Exemplo n.º 1
0
def main():
    lines_rectangle, correct_responces, lengths_list = fd.prepare_rectangle_data(
        sys.argv)
    if not (lines_rectangle is None):
        cur_figure = 1
        line_len = lengths_list[1]
        for vec_size in range(exp_start, exp_end, exp_step):
            accuracy_list = run_experiments(vec_size, experiment_number,
                                            line_len, lines_rectangle,
                                            correct_responces)

            # minimal distance approach
            totat_variation_dist = su.calculate_total_var_dist(
                lines_rectangle, line_len)
            sorted_cols_indices = np.argsort(totat_variation_dist)

            test_cols = sorted_cols_indices[:vec_size]
            test_mapping = fd.generate_mapping(lines_rectangle,
                                               correct_responces, test_cols)
            test_accuracy = fd.evaluate_mapping_accuracy(
                lines_rectangle, correct_responces, test_cols, test_mapping)

            percentile = int(find_percentile(accuracy_list, test_accuracy))

            correct_responces = np.array(correct_responces)
            p = len(correct_responces[correct_responces]) / float(
                len(correct_responces))
            random_acc = p**2 + (1.0 - p)**2

            # plotting the histogram
            fig = plt.figure(cur_figure, figsize=(10, 6))
            cur_figure += 1
            axis = plt.gca()
            axis.set_xlim([-0.1, 1.1])
            fig.suptitle(
                'C = {3}, K = {0}, Experiments: {4}\nTest accuracy: {1}, Percentile: {2}'
                .format(vec_size, test_accuracy, percentile, line_len,
                        experiment_number))

            plt.hist(accuracy_list, bins=bins_num)
            test_line = plt.axvline(test_accuracy,
                                    c='r',
                                    label='Test accuracy')
            rnd_line = plt.axvline(random_acc, c='g', label='Random guess')
            plt.xlabel('Accuracy')
            plt.ylabel('Appearances')
            plt.legend(handles=[test_line, rnd_line])
            plt.ioff()
            plt.savefig("Experiments/Experiment5/k{0}.png".format(vec_size))
Exemplo n.º 2
0
def run_experiments(vec_size, experiment_num, line_length, lines, answers):
    print "Experiment K={0} started: {1}".format(vec_size,
                                                 datetime.datetime.now())
    accuracy_list = []
    for cur_exp in xrange(experiment_num):
        if cur_exp != 0 and cur_exp % 1000 == 0:
            print "Completed: ({0}, {1})".format(vec_size, cur_exp)
        cols = select_random_spaced_cols(vec_size, line_length)
        mapping = fd.generate_mapping(lines, answers, cols)
        accuracy = fd.evaluate_mapping_accuracy(lines, answers, cols, mapping)
        accuracy_list.append(accuracy)

    print "Experiment K={0} finished: {1}".format(vec_size,
                                                  datetime.datetime.now())
    return accuracy_list
Exemplo n.º 3
0
def main():
    lines_rectangle, correct_responces, lengths_list = fd.prepare_rectangle_data(
        sys.argv)
    if not (lines_rectangle is None):
        cur_figure = 1
        line_len = lengths_list[1]
        for vec_size in range(exp_start, exp_end, exp_step):
            accuracy_list = run_experiments(vec_size, experiment_number,
                                            line_len, lines_rectangle,
                                            correct_responces)

            # minimal distance approach
            totat_variation_dist = su.calculate_total_var_dist(
                lines_rectangle, line_len)
            sorted_cols_indices = np.argsort(totat_variation_dist)

            test_cols = sorted_cols_indices[:vec_size]
            test_mapping = fd.generate_mapping(lines_rectangle,
                                               correct_responces, test_cols)
            test_accuracy = fd.evaluate_mapping_accuracy(
                lines_rectangle, correct_responces, test_cols, test_mapping)

            percentile = int(find_percentile(accuracy_list, test_accuracy))

            # plotting the histogram
            fig = plt.figure(cur_figure, figsize=(6, 6))
            cur_figure += 1
            fig.suptitle(
                'C = {3}, K = {0}, Experiments: {4}\nTest accuracy: {1}, Percentile: {2}'
                .format(vec_size, test_accuracy, percentile, line_len,
                        experiment_number))

            plt.hist(accuracy_list, bins=bins_num)
            plt.axvline(test_accuracy, c='r', label='Test accuracy')
            plt.xlabel('Accuracy')
            plt.ylabel('Appearances')
            plt.ioff()
            plt.savefig("Experiments/Experiment1/k{0}.png".format(vec_size))
Exemplo n.º 4
0
def main():
    lines_rectangle, correct_responces, lengths_list = fd.prepare_rectangle_data(
        sys.argv)
    if not (lines_rectangle is None):
        line_len = lengths_list[1]
        total_variation_dist = su.calculate_total_var_dist(
            lines_rectangle, line_len)
        sorted_cols_indices = np.argsort(total_variation_dist)

        all_lists = {}

        min_acc = 1.0
        max_acc = 0.0
        max_size = 0

        cur_figure = 1
        for vec_size in range(exp_start, exp_end, exp_step):
            print "Experiment K={0} started: {1}".format(
                vec_size, datetime.datetime.now())
            working_cols = sorted_cols_indices[:vec_size]
            mapping = fd.generate_mapping(lines_rectangle, correct_responces,
                                          working_cols)
            accuracy_list = improve_solution(lines_rectangle,
                                             correct_responces, working_cols,
                                             mapping, line_len)
            all_lists[vec_size] = accuracy_list
            cur_min = np.min(accuracy_list)
            cur_max = np.max(accuracy_list)
            cur_size = len(accuracy_list)

            if min_acc > cur_min:
                min_acc = cur_min
            if max_acc < cur_max:
                max_acc = cur_max
            if cur_size > max_size:
                max_size = cur_size

            print "Experiment K={0} finished: {1}".format(
                vec_size, datetime.datetime.now())

        delta = (max_acc - min_acc) * 0.1
        min_acc -= delta
        max_acc += delta
        max_size += 1

        for vec_size in range(exp_start, exp_end, exp_step):
            accuracy_list = all_lists[vec_size]
            # plotting
            fig = plt.figure(cur_figure, figsize=(10, 6))
            fig.suptitle('Accuracy improvement\nC = {1}, K = {0}'.format(
                vec_size, line_len))
            axis = plt.gca()
            axis.set_ylim([min_acc, max_acc])
            axis.set_xlim([-1, max_size])
            cur_figure += 1
            plt.plot(range(len(accuracy_list)), accuracy_list, 'b')

            plt.ylabel('Accuracy')
            plt.xlabel('Step')

            plt.savefig("Experiments/Experiment4/2k{0}.png".format(vec_size))
Exemplo n.º 5
0
def main():
    lines_rectangle, correct_responces, lengths_list = fd.prepare_rectangle_data(
        sys.argv)
    if not (lines_rectangle is None):
        line_len = lengths_list[1]
        total_variation_dist = su.calculate_total_var_dist(
            lines_rectangle, line_len)
        sorted_cols_indices = np.argsort(total_variation_dist)
        cur_figure = 1
        all_lists = {}
        min_acc = 1.0
        max_acc = 0.0
        max_size = 0
        for vec_size in range(exp_start, exp_end, exp_step):
            accuracy_list = []
            for i in range(vec_size, line_len + 1):
                working_cols = sorted_cols_indices[i - vec_size:i]
                test_mapping = fd.generate_mapping(lines_rectangle,
                                                   correct_responces,
                                                   working_cols)
                test_accuracy = fd.evaluate_mapping_accuracy(
                    lines_rectangle, correct_responces, working_cols,
                    test_mapping)
                accuracy_list.append(test_accuracy)

                if min_acc > test_accuracy:
                    min_acc = test_accuracy
                if max_acc < test_accuracy:
                    max_acc = test_accuracy

            if len(accuracy_list) > max_size:
                max_size = len(accuracy_list)
            all_lists[vec_size] = accuracy_list

        delta = (max_acc - min_acc) * 0.1
        min_acc -= delta
        max_acc += delta
        max_size += 5

        for vec_size in range(exp_start, exp_end, exp_step):
            accuracy_list = all_lists[vec_size]
            # plotting
            fig = plt.figure(cur_figure, figsize=(10, 6))
            cur_figure += 1
            axis = plt.gca()
            axis.set_ylim([min_acc, max_acc])
            axis.set_xlim([-5, max_size])
            fig.suptitle('C = {1}, K = {0}'.format(vec_size, line_len))
            x = np.array(range(1, len(accuracy_list) + 1))
            y = np.array(accuracy_list)
            plt.plot(x, y, 'bo')

            # plotting linear regression
            x_pred = x.reshape(-1, 1)
            model = LinearRegression(n_jobs=8)
            model.fit(x_pred, y)
            y_pred = model.predict(x_pred)
            line = plt.plot(x, y_pred)
            plt.setp(line, 'color', 'r', 'linewidth', 2.0)

            plt.ylabel('Accuracy')
            plt.xlabel('First column rank')
            plt.savefig("Experiments/Experiment2/k{0}.png".format(vec_size))