示例#1
0
文件: part1.py 项目: krutt/ML
def execute(dataset):
    roc_x, roc_y, heuristic_error_coord, th_error_coord, cc, icc = solve(dataset)

    plot_roc(roc_x, roc_y, heuristic_error_coord, th_error_coord)
    common.plot_decision_boundary(cc, icc, 'using ideal lambda')
示例#2
0
                dW2 += self.reg_lambda * self.W2
                dW1 += self.reg_lambda * self.W1

                # Gradient descent parameter update
                self.W1 += -self.epsilon * dW1
                self.b1 += -self.epsilon * db1
                self.W2 += -self.epsilon * dW2
                self.b2 += -self.epsilon * db2

                del (data_index[rand_index])

    def predict(self, x):
        """预测函数"""
        # Forward propagation
        z1 = x.dot(self.W1) + self.b1
        a1 = np.tanh(z1)
        z2 = a1.dot(self.W2) + self.b2
        exp_scores = np.exp(z2)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        return np.argmax(probs, axis=1)


if __name__ == '__main__':
    from common import gen_train_data, plot_decision_boundary
    data, label = gen_train_data()

    nn = NN(data, label, 3)
    #nn.stochastic_gradient_descent()
    nn.batch_gradient_descent()
    plot_decision_boundary(lambda x: nn.predict(x), data, label)
示例#3
0
文件: part2.py 项目: krutt/ML
def logistic_regression(train_x, train_y, test_x, test_y, lr, epochs,
                        quadratic: bool):
    """
    Performs logistic regression on training set & reports results for test set.

    :param train_x: list of 2D training samples X
    :param train_y: list of training labels Y
    :param test_x:  list of 2D testing samples X
    :param test_y: list of testing labels Y
    :param lr: learning rate
    :param epochs: number of iterations to train model
    :param quadratic: whether to use quadratic features of X (defaults to linear)
    """
    # initialize weights
    w = np.zeros((6, 1)) if quadratic else np.zeros((3, 1))
    # initialize size of data set
    n = len(train_x)
    # initialize array to store cost history
    cost_history = []
    # set feature function
    z = get_quadratic_features if quadratic else get_linear_features

    for k in range(epochs):
        y_predicted = sigmoid(w.T @ z(train_x).T)
        # calculate cost
        c = (-1 / n) * np.sum(train_y * np.log(y_predicted) +
                              (1 - train_y) * np.log(1 - y_predicted))

        # print cost every 10k epochs
        if (k % 10000 == 0):
            print(f'[epoch = {k}] cost =', c)

        # determine gradient w.r.t w
        gradient = (1 / n) * (z(train_x).T @ (y_predicted - train_y).T)
        # adjust weights
        w = w - lr * gradient
        cost_history.append(c)

    # now test on test data
    predicted_y = sigmoid(w.T @ z(test_x).T).T

    # extract size of class 0 labels (from test)
    n0 = sum(y == 0 for y in test_y)
    # extract size of class 1 labels (from test)
    n1 = sum(y == 1 for y in test_y)

    # establish classification rule
    predicted_y = [0 if py < 0.5 else 1 for py in predicted_y]

    # assess classification using testing set
    correctly_classified = []
    incorrectly_classified = []

    tp_no = fp_no = fn_no = tn_no = 0

    for k in range(len(predicted_y)):
        if (predicted_y[k] == test_y[k]):
            # correctly classified
            if predicted_y[k] == 1:
                tp_no += 1
            else:
                tn_no += 1
            correctly_classified.append(common.LabeledBox(
                test_y[k], test_x[k]))
        else:
            # incorrectly classified
            if predicted_y[k] == 1:
                fp_no += 1
            else:
                fn_no += 1
            incorrectly_classified.append(
                common.LabeledBox(test_y[k], test_x[k]))

    # determine probabilities
    tp_prob = tp_no / n1
    fp_prob = fp_no / n0
    fn_prob = fn_no / n1

    print(' - true positive count: ', tp_no)
    print(' - false positive count: ', fp_no)
    print(' - false negative count: ', fn_no)

    # determine error
    error = fp_prob * common.p0 + fn_prob * common.p1

    print('error: ', error)

    title = f'quadratic logistic [{n}]' if quadratic else f'linear logistic [{n}]'
    common.plot_decision_boundary(correctly_classified, incorrectly_classified,
                                  title)
示例#4
0
                # Add regularization terms (b1 and b2 don't have regularization terms)
                dW2 += self.reg_lambda * self.W2
                dW1 += self.reg_lambda * self.W1

                # Gradient descent parameter update
                self.W1 += -self.epsilon * dW1
                self.b1 += -self.epsilon * db1
                self.W2 += -self.epsilon * dW2
                self.b2 += -self.epsilon * db2

                del(data_index[rand_index])

    def predict(self, x):
        """预测函数"""
        # Forward propagation
        z1 = x.dot(self.W1) + self.b1
        a1 = np.tanh(z1)
        z2 = a1.dot(self.W2) + self.b2
        exp_scores = np.exp(z2)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        return np.argmax(probs, axis=1)

if __name__ == '__main__':
    from common import gen_train_data, plot_decision_boundary
    data, label = gen_train_data()

    nn = NN(data, label, 3)
    #nn.stochastic_gradient_descent()
    nn.batch_gradient_descent()
    plot_decision_boundary(lambda x: nn.predict(x), data, label)
        Args:
            data (numpy.ndarray): 训练数据集
            labels (numpy.ndarray): 训练标签
            num_iteration (int): 迭代次数
        """
        for j in xrange(num_iteration):
            data_index = range(self.data_num)
            for i in xrange(self.data_num):
                # 学习速率
                alpha = 0.01
                rand_index = int(random.uniform(0, len(data_index)))
                error = self.label[rand_index] - sigmoid(sum(self.data[rand_index] * self.weights + self.b))
                self.weights += alpha * error * self.data[rand_index]
                self.b += alpha * error
                del(data_index[rand_index])

    def predict(self, predict_data):
        """预测函数"""
        result = map(lambda x: 1 if sum(self.weights * x + self.b) > 0 else 0,
                     predict_data)
        return array(result)


if __name__ == '__main__':
    from common import gen_train_data, plot_decision_boundary
    data, label = gen_train_data()

    logistic = Logistic(data, label)
    logistic.train(200)
    plot_decision_boundary(lambda x: logistic.predict(x), data, label)
            data (numpy.ndarray): 训练数据集
            labels (numpy.ndarray): 训练标签
            num_iteration (int): 迭代次数
        """
        for j in xrange(num_iteration):
            data_index = range(self.data_num)
            for i in xrange(self.data_num):
                # 学习速率
                alpha = 0.01
                rand_index = int(random.uniform(0, len(data_index)))
                error = self.label[rand_index] - sigmoid(
                    sum(self.data[rand_index] * self.weights + self.b))
                self.weights += alpha * error * self.data[rand_index]
                self.b += alpha * error
                del (data_index[rand_index])

    def predict(self, predict_data):
        """预测函数"""
        result = map(lambda x: 1 if sum(self.weights * x + self.b) > 0 else 0,
                     predict_data)
        return array(result)


if __name__ == '__main__':
    from common import gen_train_data, plot_decision_boundary
    data, label = gen_train_data()

    logistic = Logistic(data, label)
    logistic.train(200)
    plot_decision_boundary(lambda x: logistic.predict(x), data, label)