def main():
    X_normalize, Y_normalize, Origin_X_normalize, Y_mean, Y_std, Origin_X_mean, Origin_X_std = load_data.load_and_normalize(
        'data2.txt')

    alpha = 0.024
    iterations = 800
    theta, j_list = gradient_descent(X_normalize, Y_normalize, alpha,
                                     iterations)
    print('The linear regression formula is {0} + {1} * x1 + {2} * x2'.format(
        theta[0, 0], theta[1, 0], theta[2, 0]))

    iteration_array = range(iterations)
    plot_data.plot(iteration_array, j_list, 'iteration', "lost", {
        'fmt': 'b-',
        'title': 'Lost',
        'show': True
    })

    house_size = 1650
    br_num = 3

    house_size_normal = (house_size - Origin_X_mean[0]) / Origin_X_std[0]
    br_num = (br_num - Origin_X_mean[1]) / Origin_X_std[1]

    X_new_normalize = np.array([1, house_size_normal, br_num])
    price_normalize = X_new_normalize.dot(theta).sum()
    price = price_normalize * Y_std[0] + Y_mean[0]
    print('price is {0}'.format(price))
예제 #2
0
def plot(X, show = False):
    plot_data.plot(X[:, 0], X[:, 1], 'x1', 'x2',
        {
            'markersize': 5,
            'fmt': 'kx',
            'show': show
        })
예제 #3
0
def plot(X):
    plot_data.plot(X[:, 0], X[:, 1], 'x1', 'x2',
                   {
                       'markersize': 4,
                       'fmt': 'bx',
                       'show': True
                   })
예제 #4
0
def main():
    data = load_data.load('data1.txt')
    X = data[:, 0]
    Y = data[:, 1]
    plot_data.plot(X, Y, 'house size', 'house price', {
        'show': True,
        'title': 'Original Data',
        'fmt': 'rx'
    })
예제 #5
0
def plot(X, idx_arr, centroids):
    colors = ['r', 'y', 'g', 'b']
    shapes = ['x', 'x', 'x', 'x']
    cluster_count = centroids.shape[0]
    for idx in range(cluster_count):
        row_idx_arr = np.argwhere(idx_arr == idx).ravel()
        X_idx = X[row_idx_arr]
        plot_data.plot(X_idx[:, 0], X_idx[:, 1], 'x1', 'x2', {
            'fmt': colors[idx] + shapes[idx],
            'show': False
        })
        plot_data.plot(centroids[idx, 0], centroids[idx, 1], 'x1', 'x2', {
            'fmt': 'k+',
            'show': idx == cluster_count - 1
        })
예제 #6
0
def main():
    data = load_data.load('data1.txt')

    pos_values = data[(data[:, 2] == 1)]
    neg_values = data[(data[:, 2] == 0)]

    plot_data.plot(pos_values[:, 0], pos_values[:, 1], 'score1', 'score2', {
        'fmt': 'bx',
        'markersize': 5
    })

    plot_data.plot(neg_values[:, 0], neg_values[:, 1], 'score1', 'score2', {
        'fmt': 'yo',
        'markersize': 5,
        'show': True
    })
def main():
    X, y, origin_X = load_data.load_and_process('data1.txt')
    initial_theta = np.zeros(X.shape[1])
    res = minimize(cost_function, initial_theta, args=(X, y), method=None, jac=gradient, options={'maxiter': 400})
    print(res)

    theta = res.x

    data = load_data.load('data1.txt')

    pos_values = data[(data[:, 2] == 1)]
    neg_values = data[(data[:, 2] == 0)]

    plot_data.plot(pos_values[:, 0], pos_values[:, 1], 'score1', 'score2',
                   {
                       'fmt': 'bx',
                       'markersize': 5
                   })

    plot_data.plot(neg_values[:, 0], neg_values[:, 1], 'score1', 'score2',
                   {
                       'fmt': 'yo',
                       'markersize': 5,
                       'show': False
                   })

    score1 = np.linspace(25, 100)
    score2 = []
    for item in score1:
        score2.append(((0.5 - theta[0]) - theta[1] * item) / theta[2])
    score2 = np.array(score2)

    plot_data.plot(score1, score2, 'score1', 'score2',
                   {
                       'fmt': 'r-',
                       'label': 'minimize',
                       'show': False
                   })

    theta_from_cal = np.array([-4.81180027, 0.04528064, 0.03819149]) # 0.001 10 0000
    theta_from_cal_2 = np.array([-15.39517866, 0.12825989, 0.12247929]) # 0.001 100 0000
    theta_from_cal_3 = np.array([-22.21628108, 0.18268725, 0.17763448]) # 0.003 100 0000
    score3 = []
    for item in score1:
        score3.append(((0.5 - theta_from_cal_3[0]) - theta_from_cal_3[1] * item) / theta_from_cal_3[2])
    score3 = np.array(score3)
    plot_data.plot(score1, score3, 'score1', 'score2',
                   {
                       'fmt': 'g-',
                       'label': 'gredient-descent',
                       'legend_loc': 'upper right',
                       'show': True
                   })

    print(predict(45, 85, theta_from_cal_3))
예제 #8
0
def main():
    x = np.linspace(-8, 8, 1000)
    y = sigmoid.sigmoid(x)
    plot_data.plot(x, y, 'x', 'y', {
        'fmt': 'b-',
        'title': 'sigmoid',
        'label': 'sigmoid',
        'show': False
    })
    y2 = []
    for _ in x:
        y2.append(0.5)
    y2 = np.array(y2)
    plot_data.plot(x, y2, 'x', 'y', {
        'fmt': 'g-',
        'label': 'y = 0.5',
        'show': True
    })
def main():
    X, Y, Origin_X = load_data.load_and_process('data1.txt')
    alpha = 0.023
    iterations = 400
    theta, j_list = gradient_descent(X, Y, alpha, iterations)
    print(theta)

    plot_data.plot(Origin_X, Y, 'original data figure', 'house size', {
        'fmt': 'rx',
        'label': 'Original Data'
    })
    plot_data.plot(
        Origin_X, X.dot(theta), "house size", 'price', {
            'fmt': 'b-',
            'title': 'gradient decent',
            'label': 'Linear Regression',
            'legend_loc': 'lower right',
            'show': True
        })

    iteration_array = range(iterations)
    plot_data.plot(iteration_array, j_list, 'iteration', "lost", {
        'fmt': 'b-',
        'title': 'Lost',
        'show': True
    })
예제 #10
0
def main():
    x, y, origin_x = load_data.load_and_process('data1.txt')
    alpha = 0.003
    iterations = 1000000
    theta, j_list = gradient_descent(x, y, alpha, iterations)
    print(theta)

    iteration_array = range(iterations)
    plot_data.plot(iteration_array, j_list, 'iteration', "lost", {
        'fmt': 'b-',
        'title': 'Lost',
        'show': True
    })

    data = load_data.load('data1.txt')

    pos_values = data[(data[:, 2] == 1)]
    neg_values = data[(data[:, 2] == 0)]

    plot_data.plot(pos_values[:, 0], pos_values[:, 1], 'score1', 'score2', {
        'fmt': 'bx',
        'markersize': 5
    })

    plot_data.plot(neg_values[:, 0], neg_values[:, 1], 'score1', 'score2', {
        'fmt': 'yo',
        'markersize': 5,
        'show': False
    })

    score1 = np.linspace(25, 100)
    score2 = []
    for item in score1:
        score2.append(((0.5 - theta[0, 0]) - theta[1, 0] * item) / theta[2, 0])
    score2 = np.array(score2)

    plot_data.plot(score1, score2, 'score1', 'score2', {
        'fmt': 'r-',
        'show': True
    })
예제 #11
0
def plot(X, show=False):
    plot_data.plot(X[:, 0], X[:, 1], 'x1', 'x2', {'show': show})