def main(): train_set, test_set = split_data() X = np.matrix(( np.ones(train_set.shape[0]), train_set['number_of_rooms'], train_set['living_space']) ).T Y = np.matrix((train_set['price'])).T ne_theta = normal_equation.learn(X, Y) gd_theta = gradient_descent.learn(X, Y, 0.00015, 100) test_set = test_set[['number_of_rooms', 'living_space', 'price']].values for rooms, area, price in test_set: ne_price = int(round(predict(rooms, area, ne_theta))) gd_price = int(round(predict(rooms, area, gd_theta))) print 'Number of rooms %s, area %s sqm:' % (rooms, area) print 'actual price: %s EUR' % price print 'ne predict: %s EUR (%s%%)' %( ne_price, int(100. * ne_price / price)) print 'gd predict: %s EUR (%s%%)' % ( gd_price, int(100. * gd_price / price))
def main(): train_set, test_set = split_data(district='Steglitz') #train_set, test_set = split_data() X_train = np.matrix([ np.ones(train_set.shape[0]), train_set['number_of_rooms'], train_set['living_space'] ]).T Y_train = np.matrix((train_set['price'])).T alpha = 0.00015 n_iterations = 50 thetas = gradient_descent.learn(X_train, Y_train, alpha, n_iterations, True) theta = thetas[-1] X_test = np.matrix([ np.ones(test_set.shape[0]), test_set['number_of_rooms'], test_set['living_space'] ]).T Y_test = np.matrix((test_set['price'])).T test_predictions = np.dot(X_test, theta) costs = calc_cost_functions(X_train, Y_train, thetas) print 'Train error', calc_error(X_train, Y_train, theta) print 'Test error', calc_error(X_test, Y_test, theta) print 'Train R squared', r_squared(X_train, Y_train, theta) print 'Test R squared', r_squared(X_test, Y_test, theta) figure = pl.figure(1) ax = figure.add_subplot(211) ax.scatter(test_set['price'], test_set['living_space'], label='test set') ax.scatter(train_set['price'], train_set['living_space'], color='g', label='train set') ax.plot(test_predictions, test_set['living_space'], color='red', label='regression') ax.set_xlabel('Living space, sqm') ax.set_ylabel('Price, EUR') ax.legend() ax = figure.add_subplot(212) ax.scatter(np.arange(len(costs)), costs) ax.set_xlabel('Iteration') ax.set_ylabel('Cost function') pl.show()