def main(): lines_rectangle, correct_responces, lengths_list = fd.prepare_rectangle_data( sys.argv) if not (lines_rectangle is None): cur_figure = 1 line_len = lengths_list[1] for vec_size in range(exp_start, exp_end, exp_step): accuracy_list = run_experiments(vec_size, experiment_number, line_len, lines_rectangle, correct_responces) # minimal distance approach totat_variation_dist = su.calculate_total_var_dist( lines_rectangle, line_len) sorted_cols_indices = np.argsort(totat_variation_dist) test_cols = sorted_cols_indices[:vec_size] test_mapping = fd.generate_mapping(lines_rectangle, correct_responces, test_cols) test_accuracy = fd.evaluate_mapping_accuracy( lines_rectangle, correct_responces, test_cols, test_mapping) percentile = int(find_percentile(accuracy_list, test_accuracy)) correct_responces = np.array(correct_responces) p = len(correct_responces[correct_responces]) / float( len(correct_responces)) random_acc = p**2 + (1.0 - p)**2 # plotting the histogram fig = plt.figure(cur_figure, figsize=(10, 6)) cur_figure += 1 axis = plt.gca() axis.set_xlim([-0.1, 1.1]) fig.suptitle( 'C = {3}, K = {0}, Experiments: {4}\nTest accuracy: {1}, Percentile: {2}' .format(vec_size, test_accuracy, percentile, line_len, experiment_number)) plt.hist(accuracy_list, bins=bins_num) test_line = plt.axvline(test_accuracy, c='r', label='Test accuracy') rnd_line = plt.axvline(random_acc, c='g', label='Random guess') plt.xlabel('Accuracy') plt.ylabel('Appearances') plt.legend(handles=[test_line, rnd_line]) plt.ioff() plt.savefig("Experiments/Experiment5/k{0}.png".format(vec_size))
def run_experiments(vec_size, experiment_num, line_length, lines, answers): print "Experiment K={0} started: {1}".format(vec_size, datetime.datetime.now()) accuracy_list = [] for cur_exp in xrange(experiment_num): if cur_exp != 0 and cur_exp % 1000 == 0: print "Completed: ({0}, {1})".format(vec_size, cur_exp) cols = select_random_spaced_cols(vec_size, line_length) mapping = fd.generate_mapping(lines, answers, cols) accuracy = fd.evaluate_mapping_accuracy(lines, answers, cols, mapping) accuracy_list.append(accuracy) print "Experiment K={0} finished: {1}".format(vec_size, datetime.datetime.now()) return accuracy_list
def main(): lines_rectangle, correct_responces, lengths_list = fd.prepare_rectangle_data( sys.argv) if not (lines_rectangle is None): cur_figure = 1 line_len = lengths_list[1] for vec_size in range(exp_start, exp_end, exp_step): accuracy_list = run_experiments(vec_size, experiment_number, line_len, lines_rectangle, correct_responces) # minimal distance approach totat_variation_dist = su.calculate_total_var_dist( lines_rectangle, line_len) sorted_cols_indices = np.argsort(totat_variation_dist) test_cols = sorted_cols_indices[:vec_size] test_mapping = fd.generate_mapping(lines_rectangle, correct_responces, test_cols) test_accuracy = fd.evaluate_mapping_accuracy( lines_rectangle, correct_responces, test_cols, test_mapping) percentile = int(find_percentile(accuracy_list, test_accuracy)) # plotting the histogram fig = plt.figure(cur_figure, figsize=(6, 6)) cur_figure += 1 fig.suptitle( 'C = {3}, K = {0}, Experiments: {4}\nTest accuracy: {1}, Percentile: {2}' .format(vec_size, test_accuracy, percentile, line_len, experiment_number)) plt.hist(accuracy_list, bins=bins_num) plt.axvline(test_accuracy, c='r', label='Test accuracy') plt.xlabel('Accuracy') plt.ylabel('Appearances') plt.ioff() plt.savefig("Experiments/Experiment1/k{0}.png".format(vec_size))
def main(): lines_rectangle, correct_responces, lengths_list = fd.prepare_rectangle_data( sys.argv) if not (lines_rectangle is None): line_len = lengths_list[1] total_variation_dist = su.calculate_total_var_dist( lines_rectangle, line_len) sorted_cols_indices = np.argsort(total_variation_dist) all_lists = {} min_acc = 1.0 max_acc = 0.0 max_size = 0 cur_figure = 1 for vec_size in range(exp_start, exp_end, exp_step): print "Experiment K={0} started: {1}".format( vec_size, datetime.datetime.now()) working_cols = sorted_cols_indices[:vec_size] mapping = fd.generate_mapping(lines_rectangle, correct_responces, working_cols) accuracy_list = improve_solution(lines_rectangle, correct_responces, working_cols, mapping, line_len) all_lists[vec_size] = accuracy_list cur_min = np.min(accuracy_list) cur_max = np.max(accuracy_list) cur_size = len(accuracy_list) if min_acc > cur_min: min_acc = cur_min if max_acc < cur_max: max_acc = cur_max if cur_size > max_size: max_size = cur_size print "Experiment K={0} finished: {1}".format( vec_size, datetime.datetime.now()) delta = (max_acc - min_acc) * 0.1 min_acc -= delta max_acc += delta max_size += 1 for vec_size in range(exp_start, exp_end, exp_step): accuracy_list = all_lists[vec_size] # plotting fig = plt.figure(cur_figure, figsize=(10, 6)) fig.suptitle('Accuracy improvement\nC = {1}, K = {0}'.format( vec_size, line_len)) axis = plt.gca() axis.set_ylim([min_acc, max_acc]) axis.set_xlim([-1, max_size]) cur_figure += 1 plt.plot(range(len(accuracy_list)), accuracy_list, 'b') plt.ylabel('Accuracy') plt.xlabel('Step') plt.savefig("Experiments/Experiment4/2k{0}.png".format(vec_size))
def main(): lines_rectangle, correct_responces, lengths_list = fd.prepare_rectangle_data( sys.argv) if not (lines_rectangle is None): line_len = lengths_list[1] total_variation_dist = su.calculate_total_var_dist( lines_rectangle, line_len) sorted_cols_indices = np.argsort(total_variation_dist) cur_figure = 1 all_lists = {} min_acc = 1.0 max_acc = 0.0 max_size = 0 for vec_size in range(exp_start, exp_end, exp_step): accuracy_list = [] for i in range(vec_size, line_len + 1): working_cols = sorted_cols_indices[i - vec_size:i] test_mapping = fd.generate_mapping(lines_rectangle, correct_responces, working_cols) test_accuracy = fd.evaluate_mapping_accuracy( lines_rectangle, correct_responces, working_cols, test_mapping) accuracy_list.append(test_accuracy) if min_acc > test_accuracy: min_acc = test_accuracy if max_acc < test_accuracy: max_acc = test_accuracy if len(accuracy_list) > max_size: max_size = len(accuracy_list) all_lists[vec_size] = accuracy_list delta = (max_acc - min_acc) * 0.1 min_acc -= delta max_acc += delta max_size += 5 for vec_size in range(exp_start, exp_end, exp_step): accuracy_list = all_lists[vec_size] # plotting fig = plt.figure(cur_figure, figsize=(10, 6)) cur_figure += 1 axis = plt.gca() axis.set_ylim([min_acc, max_acc]) axis.set_xlim([-5, max_size]) fig.suptitle('C = {1}, K = {0}'.format(vec_size, line_len)) x = np.array(range(1, len(accuracy_list) + 1)) y = np.array(accuracy_list) plt.plot(x, y, 'bo') # plotting linear regression x_pred = x.reshape(-1, 1) model = LinearRegression(n_jobs=8) model.fit(x_pred, y) y_pred = model.predict(x_pred) line = plt.plot(x, y_pred) plt.setp(line, 'color', 'r', 'linewidth', 2.0) plt.ylabel('Accuracy') plt.xlabel('First column rank') plt.savefig("Experiments/Experiment2/k{0}.png".format(vec_size))