Пример #1
0
def main():
    train_set, test_set = split_data()

    X = np.matrix((
            np.ones(train_set.shape[0]),
            train_set['number_of_rooms'],
            train_set['living_space'])
        ).T

    Y = np.matrix((train_set['price'])).T

    ne_theta = normal_equation.learn(X, Y)
    gd_theta = gradient_descent.learn(X, Y, 0.00015, 100)

    test_set = test_set[['number_of_rooms', 'living_space', 'price']].values

    for rooms, area, price in test_set:
        ne_price = int(round(predict(rooms, area, ne_theta)))
        gd_price = int(round(predict(rooms, area, gd_theta)))

        print 'Number of rooms %s, area %s sqm:' % (rooms, area)
        print 'actual price: %s EUR' % price
        print 'ne predict: %s EUR (%s%%)' %(
                ne_price, int(100. * ne_price / price))
        print 'gd predict: %s EUR (%s%%)' % (
                gd_price, int(100. * gd_price / price))
Пример #2
0
def main():
    train_set, test_set = split_data(district='Steglitz')
    #train_set, test_set = split_data()

    X_train = np.matrix([
            np.ones(train_set.shape[0]),
            train_set['number_of_rooms'],
            train_set['living_space']
        ]).T

    Y_train = np.matrix((train_set['price'])).T

    alpha = 0.00015
    n_iterations = 50
    thetas = gradient_descent.learn(X_train, Y_train, alpha, n_iterations, True)
    theta = thetas[-1]

    X_test = np.matrix([
            np.ones(test_set.shape[0]),
            test_set['number_of_rooms'],
            test_set['living_space']
        ]).T
    Y_test = np.matrix((test_set['price'])).T

    test_predictions = np.dot(X_test, theta)
    costs = calc_cost_functions(X_train, Y_train, thetas)

    print 'Train error', calc_error(X_train, Y_train, theta)
    print 'Test error', calc_error(X_test, Y_test, theta)

    print 'Train R squared', r_squared(X_train, Y_train, theta)
    print 'Test R squared', r_squared(X_test, Y_test, theta)

    figure = pl.figure(1)
    ax = figure.add_subplot(211)

    ax.scatter(test_set['price'], test_set['living_space'],
            label='test set')
    ax.scatter(train_set['price'], train_set['living_space'], color='g',
            label='train set')
    ax.plot(test_predictions, test_set['living_space'], color='red',
            label='regression')

    ax.set_xlabel('Living space, sqm')
    ax.set_ylabel('Price, EUR')
    ax.legend()

    ax = figure.add_subplot(212)
    ax.scatter(np.arange(len(costs)), costs)
    ax.set_xlabel('Iteration')
    ax.set_ylabel('Cost function')

    pl.show()