def MBAP(pruning_rate_per_layer, is_first, is_last): _, mask_1 = utils.prune(L1, pruning_rate_per_layer) L1_ap = tf.cond(is_first, lambda: L1 * mask_1, lambda: L1) L2_ap = tf.nn.relu(tf.matmul(L1_ap, W2) + B2) _, mask_2 = utils.prune(L2_ap, pruning_rate_per_layer) pruned_L2_ap = tf.cond(is_last, lambda: L2_ap * mask_2, lambda: L2_ap) model_ap = tf.matmul(pruned_L2_ap, W3) + B3 return model_ap
def add_event(): if request.method == 'GET': return render_template('forms/add_event.html', event=None) form = request.form.copy() utils.prune(form) utils.format_time(form, 'start') utils.format_time(form, 'end') events.add(form) return redirect(url_for('hub'))
def edit_event(): if request.method == 'GET': ID = request.args.get('ID') event = events.get(ID) utils.reverse_format_time(event, 'start') utils.reverse_format_time(event, 'end') return render_template('forms/add_event.html', event=event) form = request.form.copy() utils.prune(form) utils.format_time(form, 'start') utils.format_time(form, 'end') events.update(form['ID'], form) return redirect(url_for('admin'))
def MBFD(pruning_rate_per_layer): #L1_mb, _ = utils.prune(L1, pruning_rate_per_layer) L1_mb = L1 L2_mb_prev = tf.matmul(L1_mb, W2) pruned_L2_mb_prev, _ = utils.prune(L2_mb_prev, pruning_rate_per_layer) L2_mb = tf.nn.relu(pruned_L2_mb_prev + B2) model_mb = tf.matmul(L2_mb, W3) + B3 return model_mb
def MFD(pruning_rate_per_layer): adv_feat_mf_1, adv_feat_mf_2 = compare(is_grad_compare) #L1_mf = L1 * utils.mask_vec(adjusted_feat_1, pruning_rate_per_layer) L1_mf = L1 L2_mf_prev = tf.matmul(L1_mf, W2) _, mask = utils.prune(L2_mf_prev / adv_feat_mf_2, pruning_rate_per_layer) L2_mf_prev = L2_mf_prev * mask L2_mf = tf.nn.relu(L2_mf_prev + B2) model_mf = tf.matmul(L2_mf, W3) + B3 return model_mf
def MBAP(pruning_rate_per_layer, is_first): _, mask_1 = utils.prune_conv_feature(h_conv1, pruning_rate_per_layer) h_conv1_ap = tf.cond(is_first, lambda: h_conv1 * mask_1, lambda: h_conv1) h_pool1_ap = tf.nn.max_pool(h_conv1_ap, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') h_conv2_ap_prev = tf.nn.conv2d(h_pool1_ap, W_conv2, strides=[1, 1, 1, 1], padding='SAME') h_conv2_ap = tf.nn.relu(h_conv2_ap_prev + b_conv2) h_conv2_ap, _ = utils.prune_conv_feature(h_conv2_ap, pruning_rate_per_layer) h_pool2_ap = tf.nn.max_pool(h_conv2_ap, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') h_conv3_ap_prev = tf.nn.conv2d(h_pool2_ap, W_conv3, strides=[1, 1, 1, 1], padding='SAME') h_conv3_ap = tf.nn.relu(h_conv3_ap_prev + b_conv3) h_conv3_ap, _ = utils.prune_conv_feature(h_conv3_ap, pruning_rate_per_layer) h_conv4_ap_prev = tf.nn.conv2d(h_conv3_ap, W_conv4, strides=[1, 1, 1, 1], padding='SAME') h_conv4_ap = tf.nn.relu(h_conv4_ap_prev + b_conv4) h_conv4_ap, _ = utils.prune_conv_feature(h_conv4_ap, pruning_rate_per_layer) h_conv5_ap_prev = tf.nn.conv2d(h_conv4_ap, W_conv5, strides=[1, 1, 1, 1], padding='SAME') h_conv5_ap = tf.nn.relu(h_conv5_ap_prev + b_conv5) h_conv5_ap, _ = utils.prune_conv_feature(h_conv5_ap, pruning_rate_per_layer) h_conv5_ap_flat = tf.reshape(h_conv5_ap, [-1, 8 * 8 * 128]) h_fc1_ap_prev = tf.matmul(h_conv5_ap_flat, W_fc1) h_fc1_ap = tf.nn.relu(h_fc1_ap_prev + b_fc1) h_fc1_ap, _ = utils.prune(h_fc1_ap, pruning_rate_per_layer) model_ap = tf.matmul(h_fc1_ap, W_fc2) + b_fc2 return model_ap
def prune(splited): # Prune split with empty sentence old2new = dict() new_splited = list() for (idx, span) in enumerate(splited): span['text'] = utils.prune(span['text']) if span['text'] == '': continue old2new[idx] = len(new_splited) new_splited.append(span) # Update ifobj after prune for item in new_splited: if item['ifobj'] is not None: item['ifobj'] = [old2new[obj] for obj in item['ifobj'] if obj in old2new] return new_splited
def __init__(self, base_dir, batch_size, mode=1, cls=1, prune=None, de_norm=False): TRAIN = 1 TEST = 2 self.base_dir = base_dir self.batch_size = batch_size ds_dir = os.path.join(self.base_dir, 'dataset/class_{}'.format(cls)) if mode == TRAIN: self.x = utils.pickle_load(ds_dir + '/imgs_train.pkl') self.y = utils.pickle_load(ds_dir + '/marks_train.pkl') elif mode == TEST: self.x = utils.pickle_load(ds_dir + '/imgs_test.pkl') self.y = utils.pickle_load(ds_dir + '/marks_test.pkl') else: raise ("Invalid option, should be one {} or {}".format( TRAIN, TEST)) if de_norm: self.x = utils.de_norm(self.x) self.y = utils.de_norm(self.y) self.labels = np.array( [1 if np.sum(mask) > 0 else 0 for mask in self.y]) if prune is not None: self.x, self.y, self.labels = utils.prune(self.x, self.y, self.labels, prune) self.x = utils.norm(self.x) self.y = utils.norm(self.y) self.classes = np.unique(self.labels) self.per_class_ids = {} ids = np.array(range(len(self.x))) for c in self.classes: self.per_class_ids[c] = ids[self.labels == c] print(Counter(self.labels))
def plot_jct_cdf(logfile_paths, labels, v100s, p100s, k80s, max_input_job_rate, policies, min_job_id, max_job_id, partition=True, finish_time_fairness=False, output_directory=None): from utils import get_jcts, prune lambdas = list(set([x[5] for x in logfile_paths])) lambdas.sort(reverse=True) print(policies) for l in lambdas: handles_in_legend = [] labels_in_legend = [] input_job_rate = 3600.0 / l if input_job_rate > max_input_job_rate: continue print("Input job rate: %.2f" % input_job_rate) plt.figure(figsize=(8, 3)) if partition: axes = [ plt.subplot2grid((1, 2), (0, 0), rowspan=1), plt.subplot2grid((1, 2), (0, 1), rowspan=1), ] titles = ["Short jobs", "Long jobs"] else: axes = [ plt.subplot2grid((1, 1), (0, 0), rowspan=1), ] titles = [None] if not finish_time_fairness: axes.append(axes[0].inset_axes([0.4, 0.2, 0.5, 0.6])) titles.append(None) if finish_time_fairness: relevant_logfile_paths = list( reversed( prune(logfile_paths, v100s, p100s, k80s, "isolated", seed=0))) relevant_logfile_paths = [ x for x in relevant_logfile_paths if x[0] == l ] if len(relevant_logfile_paths) != 1: continue isolated_jcts = get_jcts(relevant_logfile_paths[0][1], seed=0, min_job_id=min_job_id, max_job_id=max_job_id) isolated_jcts.sort(key=lambda x: x[1]) linestyles = ['--', '-.', ':', '--', '-.'] for i, policy in enumerate(policies): relevant_logfile_paths = list( reversed( prune(logfile_paths, v100s, p100s, k80s, policy, seed=0))) relevant_logfile_paths = [ x for x in relevant_logfile_paths if x[0] == l ] if len(relevant_logfile_paths) != 1: continue jcts = get_jcts(relevant_logfile_paths[0][1], seed=0, min_job_id=min_job_id, max_job_id=max_job_id) jcts.sort(key=lambda x: x[1]) partition_point = int(len(jcts) * 0.8) if finish_time_fairness: jcts = [x[0] / y[0] for (x, y) in zip(jcts, isolated_jcts)] else: jcts = [x[0] for x in jcts] print("%s: %.2f" % (policy, np.mean(jcts))) if partition: jcts = np.split(np.array(jcts), [partition_point]) else: jcts = [np.array(jcts), np.array(jcts)] for j, (ax, jcts_segment) in enumerate(zip(axes, jcts)): jcts_segment.sort() percentiles = [(k + 1) / len(jcts_segment) for k in range(len(jcts_segment))] if "Gavel" in labels[policy]: handle = ax.plot(jcts_segment, percentiles, color=current_palette[i], linestyle='-', linewidth=3) else: handle = ax.plot(jcts_segment, percentiles, color=current_palette[i], linestyle=linestyles[i]) if j == 0: handles_in_legend.append(handle[0]) labels_in_legend.append(labels[policy]) for i, (ax, title) in enumerate(zip(axes, titles)): if finish_time_fairness: if partition: ax.set_xlabel("FTF" + "\n" + title) else: ax.set_xlabel("FTF") ax.set_xlim([0, 4]) ax.set_xticks([0, 1, 2, 3, 4]) ax.set_yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0]) else: if partition: ax.set_xlabel("JCT (hrs)" + "\n" + title) else: if i == 0: ax.set_xlabel("JCT (hrs)") if not partition: if i == 0: ax.set_xlim([0, 500]) ax.set_yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0]) else: ax.set_xlim([0, 25]) ax.set_xticks([0, 5, 10, 15, 20, 25]) ax.set_yticks([0, 0.33, 0.67, 1.0]) if i == 0: ax.set_ylabel("Fraction of jobs") if partition: if i > 0: ax.set_yticklabels(["", "", "", "", "", ""]) ax.set_ylim([0, 1.0]) sns.despine() if not partition and not finish_time_fairness: axes[0].indicate_inset_zoom(axes[1], linewidth=3) leg = plt.figlegend(handles=handles_in_legend, labels=labels_in_legend, ncol=3, frameon=False, loc='upper center') bb = leg.get_bbox_to_anchor().inverse_transformed(axes[0].transAxes) bb.y0 += 0.22 bb.y1 += 0.22 leg.set_bbox_to_anchor(bb, transform=axes[0].transAxes) if output_directory is not None: output_filename = os.path.join( output_directory, "input_job_rate=%d.pdf" % (input_job_rate * 10)) with PdfPages(output_filename) as pdf: pdf.savefig(bbox_inches='tight') plt.show()
def plot_metric_vs_inverse_lambda_different_metric_fns(logfile_paths, labels, v100s, p100s, k80s, policies, metric_fns, metric_fn_labels, metric_label, xmin=0, xmax=None, ymin=0, ymax=None, output_filename=None, verbose=False): from utils import prune plt.figure(figsize=(8, 3)) ax = plt.subplot2grid((1, 1), (0, 0), colspan=1) data = {"input_job_rate": [], "metric": [], "seed": [], "policy": []} print(policies) for policy in policies: relevant_logfile_paths = list( reversed(prune(logfile_paths, v100s, p100s, k80s, policy))) for metric_fn_label, metric_fn in zip(metric_fn_labels, metric_fns): lambdas = [x[0] for x in relevant_logfile_paths] input_job_rates = [3600.0 / x for x in lambdas] metrics = [metric_fn(x[1]) for x in relevant_logfile_paths] seeds = [x[2] for x in relevant_logfile_paths] policies = [ labels[policy] + " (%s)" % metric_fn_label for i in range(len(metrics)) ] import pandas as pd data["input_job_rate"] += input_job_rates data["metric"] += metrics data["seed"] += seeds data["policy"] += policies if verbose: import pandas as pd df = pd.DataFrame(data) print(df.groupby(["policy", "input_job_rate"]).mean()) sns.lineplot(x='input_job_rate', y='metric', style='policy', hue='policy', data=data, ci='sd', markers=True) ax.set_xlabel("Input job rate (jobs/hr)") ax.set_ylabel(metric_label) ax.set_xlim([xmin, xmax]) ax.set_ylim([ymin, ymax]) sns.despine() leg = plt.legend(frameon=False) bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes) bb.y0 += 0.22 bb.y1 += 0.22 leg.set_bbox_to_anchor(bb, transform=ax.transAxes) if output_filename is not None: with PdfPages(output_filename) as pdf: pdf.savefig(bbox_inches='tight') plt.show()
def plot_metric_vs_inverse_lambda_different_mechanisms(all_logfile_paths, labels, label_modifiers, v100s, p100s, k80s, policies, metric_fn, metric_label, xmax=None, ymax=None, output_filename=None, extrapolate=False): from utils import prune plt.figure(figsize=(4.5, 3)) ax = plt.subplot2grid((1, 1), (0, 0), colspan=1) data = {"input_job_rate": [], "metric": [], "seed": [], "policy": []} print(policies) for policy in policies: for logfile_paths, label_modifier in zip(all_logfile_paths, label_modifiers): relevant_logfile_paths = list( reversed(prune(logfile_paths, v100s, p100s, k80s, policy))) label = labels[policy] + label_modifier lambdas = [] metrics = [] seeds = [] for x in relevant_logfile_paths: metric = metric_fn(x[1]) if metric is not None: lambdas.append(x[0]) metrics.append(metric) seeds.append(x[2]) input_job_rates = [3600.0 / x for x in lambdas] policies = [label for i in range(len(metrics))] data["input_job_rate"] += input_job_rates data["metric"] += metrics data["seed"] += seeds data["policy"] += policies if len(input_job_rates) > 0 and extrapolate: data["input_job_rate"] += [max(input_job_rates) + 0.2] data["metric"] += [105.0] data["seed"] += [0] data["policy"] += [label] sns.lineplot(x='input_job_rate', y='metric', style='policy', hue='policy', data=data, ci='sd', markers=True) ax.set_xlabel("Input job rate (jobs/hr)") ax.set_ylabel(metric_label) ax.set_xlim([0, xmax]) ax.set_ylim([0, ymax]) sns.despine() leg = plt.legend(loc='upper left', frameon=False) bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes) bb.y0 += 0.1 bb.y1 += 0.1 leg.set_bbox_to_anchor(bb, transform=ax.transAxes) if output_filename is not None: with PdfPages(output_filename) as pdf: pdf.savefig(bbox_inches='tight') plt.show()
def pruning(self, num_keep): with torch.no_grad(): self.mask_normal = prune(self.alphas_normal, num_keep, self.mask_normal) self.mask_reduce = prune(self.alphas_reduce, num_keep, self.mask_reduce)
def pruning(self, num_keep): self._mask = prune(self._arch_parameters, num_keep, self._mask)
def main(data, target, args): # Names for various stuff model_name = 'model_{0}_{1}_{2}.lp'.format(args.data, args.kernel, args.function) param_name = 'model_{0}_{1}_{2}.prm'.format(args.data, args.kernel, args.function) solution_name = 'solution_{0}_{1}_{2}.sol'.format(args.data, args.kernel, args.function) ultrametric_name = 'ultrametric_{0}_{1}_{2}'.format(args.data, args.kernel, args.function) var_name = 'var_{0}_{1}_{2}_{3}.pkl'.format(args.data, args.data, args.kernel, args.function) obj_name = 'obj_{0}_{1}_{2}_{3}.pkl'.format(args.data, args.data, args.kernel, args.function) laminar_name = 'laminar_{0}_{1}_{2}.pkl'.format(args.data, args.kernel, args.function) tree_name = 'lp_tree_{0}_{1}_{2}_{3}.pdf'.format(args.data, args.kernel, args.function, args.eps) err_dict_name = 'error_{0}_{1}_{2}_{3}.txt'.format(args.data, args.kernel, args.function, args.eps) # Test other hierarchical clustering algorithms one_target = map(lambda x: x + 1, target) k = args.prune y = pdist(data, metric='euclidean') Z = [] Z.append(hac.linkage(y, method='single')) Z.append(hac.linkage(y, method='complete')) Z.append(hac.linkage(y, method='average')) ward = hac.linkage(data, method='ward') Z.append(ward) errors = [] while Z: x = Z.pop(0) pred = hac.fcluster(x, k, 'maxclust') # print('pred = ', pred) err = utils.error(list(pred), one_target) errors.append(err) # K means clf = KMeans(k) pred = clf.fit_predict(data) pred = map(lambda x: x + 1, pred) err = utils.error(list(pred), one_target) errors.append(err) # print('kmeans = ', pred) error_dict = {'single linkage': errors[0], 'complete linkage': errors[1], 'average linkage': errors[2], 'ward': errors[3]} error_dict['kmeans'] = errors[4] print(error_dict) # initialize model if args.function == 'linear': m = init_model(data, args.kernel, args.triangle, utils.linear) if args.function == 'quadratic': m = init_model(data, args.kernel, args.triangle, utils.quadratic) if args.function == 'cubic': m = init_model(data, args.kernel, args.triangle, utils.cubic) if args.function == 'logarithm': m = init_model(data, args.kernel, args.triangle, utils.logarithm) if args.function == 'exponential': m = init_model(data, args.kernel, args.triangle, utils.exponential) m._n = data.shape[0] # Check if reading solution from file if args.solution: print('Reading LP solution from ', args.solution) solution_dict = read_solution(m, args.solution) else: start = time.time() print('Optimizing over model') m.optimize() flag = args.triangle while flag and time.time() - start < args.time: print("Time_diff = {}".format(time.time() - start)) m.optimize() # Feed solution to separation oracle flag = separation_oracle(m, args.triangle) end = time.time() print('Total time to optimize = {0}'.format(end - start)) print('Writing solution to ', solution_name) m.write(solution_name) print('Saving model to ', model_name) m.write(model_name) solution_dict = get_solution_dict(solution_name) # print('Triangle inequality satisfied: ', check_triangle_constraints(m)) # print('Spreading constraints satisfied: ', check_spreading_constraints(m)) # Get ultrametric from LP print('Rounding LP') if args.function == 'linear': d = get_ultrametric_from_lp(m, solution_dict, args.eps, utils.linear) utils.inverse_ultrametric(d, utils.inverse_linear) elif args.function == 'quadratic': d = get_ultrametric_from_lp(m, solution_dict, args.eps, utils.quadratic) utils.inverse_ultrametric(d, utils.inverse_quadratic) elif args.function == 'cubic': d = get_ultrametric_from_lp(m, solution_dict, args.eps, utils.cubic) utils.inverse_ultrametric(d, utils.inverse_cubic) elif args.function == 'exponential': d = get_ultrametric_from_lp(m, solution_dict, args.eps, utils.exponential) utils.inverse_ultrametric(d, utils.inverse_exponential) elif args.function == 'logarithm': d = get_ultrametric_from_lp(m, solution_dict, args.eps, utils.logarithm) utils.inverse_ultrametric(d, utils.inverse_logarithm) print('d = ', d) cost = utils.get_cost(m, d) print('Cost of hierarchy: ', cost) print('Check ultrametric: ', utils.check_ultrametric(d)) # print(d) total_obj = utils.get_total(m) print('Total objective = ', total_obj) print('Scaled cost = ', cost/total_obj) utils.complete_ultrametric(d) print('Building laminar list') L = utils.build_laminar_list(d) # print('Laminar list = ', L) print('Check laminar: ', utils.test_laminar(L)) labels = [1]*m._n pruned = utils.prune(L, one_target, k, labels) print('Error on pruning: ', pruned[0]) error_dict['lp rounding'] = pruned[0] with open(err_dict_name, 'wb') as f: f.write(str(error_dict)) # Build and draw the hierarchy G = utils.build_hierarchy(d) print('Drawing tree to ', tree_name) utils.draw(G, target, m._n, tree_name)
def plot_metric_vs_inverse_lambda(logfile_paths, labels, v100s, p100s, k80s, policies, metric_fn, metric_label, xmax=None, ymax=None, output_filename=None, extrapolate=False, verbose=False): from utils import prune plt.figure(figsize=(8, 3)) ax = plt.subplot2grid((1, 1), (0, 0), colspan=1) data = {"input_job_rate": [], "metric": [], "seed": [], "policy": []} print(policies) for policy in policies: relevant_logfile_paths = list( reversed(prune(logfile_paths, v100s, p100s, k80s, policy))) lambdas = [x[0] for x in relevant_logfile_paths] input_job_rates = [3600.0 / x for x in lambdas] metrics = [metric_fn(x[1]) for x in relevant_logfile_paths] seeds = [x[2] for x in relevant_logfile_paths] policies = [labels[policy] for i in range(len(metrics))] data["input_job_rate"] += input_job_rates data["metric"] += metrics data["seed"] += seeds data["policy"] += policies if len(input_job_rates) > 0 and extrapolate: data["input_job_rate"] += [max(input_job_rates) + 0.4] data["metric"] += [105.0] data["seed"] += [0] data["policy"] += [labels[policy]] if verbose: df = pd.DataFrame(data) grouped_df = df.groupby(["policy", "input_job_rate", "seed"]) for name_of_the_group, group in grouped_df: print(name_of_the_group) print(group.mean()) sns.lineplot(x='input_job_rate', y='metric', style='policy', hue='policy', data=data, ci='sd', markers=True) ax.set_xlabel("Input job rate (jobs/hr)") ax.set_ylabel(metric_label) ax.set_xlim([0, xmax]) ax.set_ylim([0, ymax]) sns.despine() leg = plt.legend(loc='upper left', frameon=False) bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes) bb.y0 += 0.22 bb.y1 += 0.22 leg.set_bbox_to_anchor(bb, transform=ax.transAxes) if output_filename is not None: with PdfPages(output_filename) as pdf: pdf.savefig(bbox_inches='tight') plt.show()
def __init__( self, data_src, batch_size=5, dataset='MNIST', rst=64, prune_classes=None, k_shot=5, ): self.batch_size = batch_size self.data_src = data_src if dataset == 'chest': if self.data_src == self.TEST: x, y = utils.load_test_data(rst) self.dataset_x = x self.dataset_y = y else: x, y = utils.load_train_data(rst) self.dataset_x = x self.dataset_y = y elif dataset == 'flowers': x, y = utils.pickle_load(BASE_DIR + '/dataset/flowers/imgs_labels.pkl') to_train_classes = self.to_train_classes to_test_classes = self.to_test_classes if self.data_src == self.TEST: to_keep = np.array( [i for i, l in enumerate(y) if l in to_test_classes]) x, y = x[to_keep], y[to_keep] self.dataset_x = x # TODO start from 0 self.dataset_y = y else: to_keep = np.array( [i for i, l in enumerate(y) if l in to_train_classes]) x, y = x[to_keep], y[to_keep] self.dataset_x = x # TODO start from 0 self.dataset_y = y else: # multi chest x, y = self._load_data(rst) x = utils.denormalize(x) to_train_classes = self.to_train_classes to_test_classes = self.to_test_classes if self.data_src == self.TEST: to_keep = [] counter = {12: 0, 13: 0, 14: 0} for i, l in enumerate(y): if l not in to_train_classes and counter[l] < k_shot: to_keep.append(i) counter[l] += 1 to_keep = np.array(to_keep) if len(to_keep) > 0: x, y = x[to_keep], y[to_keep] self.dataset_x = x self.dataset_y = np.array([l for l in y]) else: to_keep = np.array( [i for i, l in enumerate(y) if l in to_train_classes]) x, y = x[to_keep], y[to_keep] self.dataset_x = x self.dataset_y = np.array([l for l in y]) # Normalize between -1 and 1 self.dataset_x = utils.normalize(self.dataset_x) print(self.dataset_x.shape[0], self.dataset_y.shape[0]) assert (self.dataset_x.shape[0] == self.dataset_y.shape[0]) # Compute per class instance count. classes = np.unique(self.dataset_y) self.classes = classes per_class_count = list() for c in classes: per_class_count.append(np.sum(np.array(self.dataset_y == c))) if prune_classes: self.dataset_x, self.dataset_y = utils.prune( self.dataset_x, self.dataset_y, prune_classes) # Recount after pruning per_class_count = list() for c in classes: per_class_count.append(np.sum(np.array(self.dataset_y == c))) self.per_class_count = per_class_count # List of labels self.label_table = [str(c) for c in range(len(self.classes))] # Preload all the labels. self.labels = self.dataset_y[:] # per class ids self.per_class_ids = dict() ids = np.array(range(len(self.dataset_x))) for c in classes: self.per_class_ids[c] = ids[self.labels == c]
def main(data, target, args): model_name = 'model_{0}_{1}_{2}.lp'.format(args.data, args.kernel, args.function) param_name = 'model_{0}_{1}_{2}.prm'.format(args.data, args.kernel, args.function) solution_name = 'solution_{0}_{1}_{2}.sol'.format(args.data, args.kernel, args.function) ultrametric_name = 'ultrametric_{0}_{1}_{2}'.format( args.data, args.kernel, args.function) var_name = 'var_{0}_{1}_{2}_{3}.pkl'.format(args.data, args.data, args.kernel, args.function) obj_name = 'obj_{0}_{1}_{2}_{3}.pkl'.format(args.data, args.data, args.kernel, args.function) laminar_name = 'laminar_{0}_{1}_{2}.pkl'.format(args.data, args.kernel, args.function) tree_name = 'ip_tree_{0}_{1}_{2}.pdf'.format(args.data, args.kernel, args.function) if args.kernel == 'cosine': y = pdist(data, metric='cosine') # Make condensed distance matrix into redundant form similarity = 1 - y similarity = squareform(similarity) if args.kernel == 'gaussian': y = pdist(data, metric='sqeuclidean') s = 1 y = 1 - np.exp(-(y**2) / (2 * s**2)) # Make condensed distance matrix into redundant form similarity = 1 - y similarity = squareform(similarity) if args.kernel == 'sqeuclidean': y = pdist(data, metric='sqeuclidean') similarity = -y similarity = squareform(similarity) if args.function == 'linear': m = init_model(data, similarity, target, utils.linear) elif args.function == 'quadratic': m = init_model(data, similarity, target, utils.quadratic) elif args.function == 'cubic': m = init_model(data, similarity, target, utils.cubic) elif args.function == 'exponential': m = init_model(data, similarity, target, utils.exponential) elif args.function == 'logarithm': m = init_model(data, similarity, target, utils.logarithm) else: exit(0) print('Saving model') m.write(model_name) # Use concurrent optimization m.params.method = 3 # Limit memory m.params.NodeFileStart = 10 # Limit number of threads m.params.Threads = args.num_threads # Set MIP Focus m.params.MIPFocus = 3 # Tune parameters print('Tuning parameters') m.params.tuneResults = 1 m.tune() if m.tuneResultCount > 0: m.getTuneResult(0) # Set MIP Gap m.params.MIPGap = 0.01 print('Saving model parameters') m.write(param_name) print('Saving objective functions') with open(obj_name, 'wb') as f: pickle.dump(m._obj, f) print('Optimizing over model') m._n = data.shape[0] m.optimize(callback_function) if m.status == GRB.Status.OPTIMAL: # Write solution m.write(solution_name) print('Check binary triangle for solution: ', check_binary_triangle(m)) # Get ultrametric if args.function == 'linear': d = get_ultrametric(m, utils.linear) utils.inverse_ultrametric(d, utils.inverse_linear) elif args.function == 'quadratic': d = get_ultrametric(m, utils.quadratic) utils.inverse_ultrametric(d, utils.inverse_quadratic) elif args.function == 'cubic': d = get_ultrametric(m, utils.cubic) utils.inverse_ultrametric(d, utils.inverse_cubic) elif args.function == 'exponential': d = get_ultrametric(m, utils.exponential) utils.inverse_ultrametric(d, utils.inverse_exponential) elif args.function == 'logarithm': d = get_ultrametric(m, utils.logarithm) utils.inverse_ultrametric(d, utils.inverse_logarithm) print('d = ', d) print('Check ultrametric: ', utils.check_ultrametric(d)) cost = utils.get_cost(m, d) print('Cost of hierarchy = ', cost) total_obj = utils.get_total(m) print('Total cost = ', total_obj) print('Scaled cost = ', cost / total_obj) # Complete ultrametric utils.complete_ultrametric(d) # Build laminar list from d print('building laminar list') L = utils.build_laminar_list(d) print('L = ', L) print('Check laminar: ', utils.test_laminar(L)) labels = [1] * m._n one_target = map(lambda x: x + 1, target) # Prune laminar list pruned = utils.prune(L, one_target, args.prune, labels) print('Error on pruning: ', pruned[0]) with open(ultrametric_name, 'wb') as f: pickle.dump(d, f) # Build hierarchy print('Building hierarchy') G = utils.build_hierarchy(d) # Draw hierarchy print('Drawing hierarchy to ', tree_name) utils.draw(G, target, m._n, tree_name) elif m.status == GRB.Status.INFEASIBLE: # Compute IIS, for debugging purposes m.computeIIS() m.write('infeasible.ilp')