示例#1
0
文件: q4.py 项目: oryband/homework
def simulate_seperable(data_size):
    """Simulate learning a completely seperable data set."""
    data = generate_sphere_data(10000, margin=0)
    train_data, test_data = split_list(data, 0.75)
    w = train(train_data, max_iter=500, r=0.01)
    error = test(test_data, w)
    status(train_data, test_data, error)

    plot_data(data)
    plot_w(data, w)
    show()
示例#2
0
文件: q4.py 项目: oryband/homework
def simulate_increasing(data_size, margin=0.3, max_iter=100, learning_rate=0.1,
                        steps=5, start=None, end=None):
    """Simulate learning an increasing training data set.

    Generates an unseperable data set, and trains on an increasing training
    set, then tests and plots.

    start: Initial (first step) training data set size.
    end: Final (last step) training data set size.
    """
    data = generate_sphere_data(data_size, margin=margin)
    train_data, test_data = split_list(data, 0.75)

    # Initialize start/end sizes if not given.
    start = len(train_data)/steps if start is None else start
    end = len(train_data) if end is None else end

    w_colors = ['b', 'c', 'm', 'y', 'k']  # w vector (line) graph color.
    w_gs = []  # w plot graphs.
    sizes = []  # Training data set sizes.
    success = []  # Success rates according to training data set sizes.
    for i in xrange(steps):
        # Increase training data size according to iteration.
        size = start + i*end/steps
        current_train_data = train_data[:size]

        w = train(current_train_data, max_iter=max_iter, r=learning_rate)
        error = test(test_data, w)

        status(current_train_data, test_data, error)
        print

        # Record size-success statistics.
        sizes.append(size)
        success.append(100 - error)

        # Plot decision boundary.
        w_color = w_colors[i] if i < len(w_colors) else w_colors[-1]
        figure(0)
        g, = plot_w(current_train_data, w, color=w_color)
        w_gs.append(g)

    figure(0).suptitle('Test data size: %d\nMaximum iterations: %d' % (len(test_data), max_iter))
    plot_w_legend(w_gs, sizes)
    plot_data(data)

    figure(1).suptitle('Success rate according to training set size.')
    plot_success_per_size(sizes, success)

    show()