def predict(self, x, y):
     # since dimension issue x*W.T will be the right way to describe
     # so the assign of W will change the original dimension setting(take transpose, but mean the same)
     self.h1 = f.sigmoid((self.x).dot(self.w1))
     self.h2 = f.sigmoid((self.h1).dot(self.w2))
     self.res = f.sigmoid((self.h2).dot(self.w3))
     self.pred_y = np.where(self.res >= 0.5, 1, 0)
     self.acc = (self.y.shape[0] -
                 abs(self.y - self.pred_y).sum()) / self.y.shape[0] * 100.0
示例#2
0
文件: c3.py 项目: o93/aizero
def predict(network, x):
    W1, W2, W3 = network["W1"], network["W2"], network["W3"]
    b1, b2, b3 = network["b1"], network["b2"], network["b3"]

    a1 = np.dot(x, W1) + b1
    z1 = func.sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = func.sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = func.softmax(a3)

    return y
示例#3
0
def predict(w, b, X):
    '''
    Predict whether the label is 0 or 1 using learned logistic regression parameters (w, b)

    Arguments:
    w -- weights, a numpy array of size (num_px * num_px * 3, 1)
    b -- bias, a scalar
    X -- data of size (num_px * num_px * 3, number of examples)

    Returns:
    Y_prediction -- a numpy array (vector) containing all predictions (0/1) for the examples in X
    '''

    m = X.shape[1]
    Y_prediction = np.zeros((1, m))
    w = w.reshape(X.shape[0], 1)

    # Compute vector "A" predicting the probabilities of a cat being present in the picture
    ### START CODE HERE ### (≈ 1 line of code)
    A = sigmoid(np.dot(w.T, X) + b)
    ### END CODE HERE ###

    for i in range(A.shape[1]):
        # Convert probabilities a[0,i] to actual predictions p[0,i]
        ### START CODE HERE ### (≈ 4 lines of code)
        Y_prediction[0, i] = 1 if A[0, i] > 0.5 else 0
        ### END CODE HERE ###

    assert(Y_prediction.shape == (1, m))

    return Y_prediction
 def fit(self, x, y, learning_rate=0.01, epochs: int = 10, L2=0.1):
     # 如果y只有一列,给他reshape一下,如果shape是(m,)容易出错,改为(m,1)
     if y.ndim == 1:
         y = y.reshape(-1, 1)
     # 初始化w,目标函数,损失函数,求导,正则项,更新w
     # w是一个列向量。
     m = len(y)
     np.random.seed(42)
     self.w = np.random.random((x.shape[1], 1))
     # 采用平方误差
     y_pred = np.zeros(y.shape)
     grad = 0
     b = 0
     for i in range(epochs):
         for j in range(m):
             y_pred[j] = sigmoid(np.dot(x[j, :], self.w))
             temp = (1 - y_pred[j]) * y_pred[j] * x[j, :] * y[j] + (
                 1 - y[j]) * y_pred[j] * x[j, :]
             grad = temp.reshape(-1, 1) + grad
             b = b + y[j] - y_pred[j]
         regular = L2 / m * self.w
         grad = grad.reshape(-1, 1)
         grad = grad / m + regular
         b = b / m
         self.w = self.w - learning_rate * grad
         self.w[0] = b
         err = loss(y, y_pred, w)
         print(f"loss:{err},grad{grad.T},,,a")
     return self.w
示例#5
0
    def predict(self, x):
        W1, W2 = self.params["W1"], self.params["W2"]
        b1, b2 = self.params["b1"], self.params["b2"]

        a1 = np.dot(x, W1) + b1
        z1 = func.sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = func.softmax(a2)

        return y
 def fit(self, x, y, learning_rate=0.01, epochs: int = 10, L2=0.1):
     # 如果y只有一列,给他reshape一下,如果shape是(m,)容易出错,改为(m,1)
     if y.ndim == 1:
         y = y.reshape(-1, 1)
     # 初始化w,目标函数,损失函数,求导,正则项,更新w
     # w是一个列向量。
     m = len(y)
     np.random.seed(42)
     self.w = np.random.random((x.shape[1], 1))
     # 采用平方误差
     for i in range(epochs):
         target = np.clip(sigmoid(np.dot(x, self.w)), 1e-5, 1 - 1e-5)
         grad = np.dot(x.T, target - target**2 * y) / m + self.w * L2
         loss = -(np.dot(y.T, np.log(target)) +
                  np.dot(1 - y.T, np.log(1 - target))) / m
         self.w = self.w - learning_rate * grad
         self.w[0] = np.sum(y - target) / m
         print(f"loss:{loss},grad{grad.T}")
     return self.w
示例#7
0
def propagate(w, b, X, Y):
    """
    Implement the cost function and its gradient for the propagation explained above

    Arguments:
    w -- weights, a numpy array of size (num_px * num_px * 3, 1)
    b -- bias, a scalar
    X -- data of size (num_px * num_px * 3, number of examples)
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat) of size (1, number of examples)

    Return:
    cost -- negative log-likelihood cost for logistic regression
    dw -- gradient of the loss with respect to w, thus same shape as w
    db -- gradient of the loss with respect to b, thus same shape as b

    Tips:
    - Write your code step by step for the propagation
    """

    m = X.shape[1]

    # FORWARD PROPAGATION (FROM X TO COST)
    ### START CODE HERE ### (≈ 2 lines of code)
    A = sigmoid(np.dot(w.T, X) + b)  # compute activation
    cost = (- 1 / m) * np.sum(Y * np.log(A) + (1 - Y) * (np.log(1 - A)))  # compute cost
    ### END CODE HERE ###

    # BACKWARD PROPAGATION (TO FIND GRAD)
    ### START CODE HERE ### (≈ 2 lines of code)
    dw = (1 / m) * np.dot(X, (A - Y).T)
    db = (1 / m) * np.sum(A - Y)
    ### END CODE HERE ###

    assert(dw.shape == w.shape)
    assert(db.dtype == float)
    cost = np.squeeze(cost)
    assert(cost.shape == ())

    grads = {"dw": dw,
             "db": db}

    return grads, cost
示例#8
0
    def gradient(self, x, t):
        W1, W2 = self.params["W1"], self.params["W2"]
        b1, b2 = self.params["b1"], self.params["b2"]

        grads = {}

        batch_num = x.shape[0]

        a1 = np.dot(x, W1) + b1
        z1 = func.sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = func.softmax(a2)

        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)

        dz1 = np.dot(dy, W2.T)
        da1 = func.sigmoid_grad(a1) * dz1
        grads['W1'] = np.dot(x.T, da1)
        grads['b1'] = np.sum(da1, axis=0)

        return grads
示例#9
0
def main():
    # simple arguments check
    if len(sys.argv) != 6:
        print("wrong number of arguments")
        sys.exit(0)

    # passing args
    learning_rate = sys.argv[1]
    num_hidden_units = sys.argv[2]
    num_epoches = sys.argv[3]
    training_set_path = sys.argv[4]
    test_set_path = sys.argv[5]

    # load data
    training_set, test_set = func.load_data(training_set_path, test_set_path)
    # find index of num and category features
    num_feature_index_list, cate_feature_index_list = func.classify_features(training_set, test_set)
    # getting traning set matrices
    training_set_num_feature_matrix, training_set_cate_feature_matrix, training_set_label_matrix = func.get_matrices(training_set, num_feature_index_list, cate_feature_index_list)
    test_set_num_feature_matrix, test_set_cate_feature_matrix, test_set_label_matrix = func.get_matrices(test_set, num_feature_index_list, cate_feature_index_list)
    # get the total number of training and test instances
    total_num_training = training_set_num_feature_matrix.shape[0]
    total_num_test = test_set_num_feature_matrix.shape[0]
    # fill up the feature status list
    # [num, cate]
    feature_status = [0,0]
    if training_set_num_feature_matrix.size != 0 :
        feature_status[0] = 1
    if training_set_label_matrix.size != 0 :
        feature_status[1] = 1
    # standardize num features
    normed_training_set_num_feature_matrix = np.zeros((total_num_training,0))
    normed_test_set_num_feature_matrix = np.zeros((total_num_test,0))
    if feature_status[0] == 1:
        normed_training_set_num_feature_matrix, normed_test_set_num_feature_matrix = func.standardize(training_set_num_feature_matrix, test_set_num_feature_matrix)

    # combine the feature matrix, reorder
    combined_index_list = num_feature_index_list + cate_feature_index_list
    sorted_index_list = np.argsort(combined_index_list)
    combined_training_set_feature_matrix = np.hstack((normed_training_set_num_feature_matrix,training_set_cate_feature_matrix))
    combined_test_set_feature_matrix = np.hstack((normed_test_set_num_feature_matrix,test_set_cate_feature_matrix))
    ordered_training_set_feature_matrix = combined_training_set_feature_matrix[:,sorted_index_list]
    ordered_test_set_feature_matrix = combined_test_set_feature_matrix[:,sorted_index_list]

    # one-hot
    num_to_skip = 0
    for idx,original_idx in enumerate(cate_feature_index_list):
        variant_list = training_set["metadata"]["features"][:-1][original_idx][1]

        cur_training_col = training_set_cate_feature_matrix[:,idx]
        cur_test_col = test_set_cate_feature_matrix[:,idx]

        for jdx, variant in enumerate(variant_list):
            cur_training_col[cur_training_col == variant] = jdx
            cur_test_col[cur_test_col == variant] = jdx

        cur_training_col = cur_training_col.astype(int)
        cur_test_col = cur_test_col.astype(int)

        expanded_training_cols = np.zeros((total_num_training,len(variant_list)))
        expanded_training_cols[np.arange(total_num_training),cur_training_col.flatten()] = 1
        expanded_test_cols = np.zeros((total_num_test,len(variant_list)))
        expanded_test_cols[np.arange(total_num_test),cur_test_col.flatten()] = 1

        ordered_training_set_feature_matrix = np.delete(ordered_training_set_feature_matrix,original_idx + num_to_skip,axis=1)
        ordered_training_set_feature_matrix = np.insert(ordered_training_set_feature_matrix,[original_idx + num_to_skip],expanded_training_cols,axis=1)
        ordered_test_set_feature_matrix = np.delete(ordered_test_set_feature_matrix,original_idx + num_to_skip,axis=1)
        ordered_test_set_feature_matrix = np.insert(ordered_test_set_feature_matrix,[original_idx + num_to_skip],expanded_test_cols,axis=1)
        num_to_skip += (len(variant_list) - 1)

    # append bias entry
    ordered_training_set_feature_matrix = np.insert(ordered_training_set_feature_matrix,0,1,axis=1).astype(float)
    ordered_test_set_feature_matrix = np.insert(ordered_test_set_feature_matrix,0,1,axis=1).astype(float)
    # initialize weight
    w_i_h = np.random.uniform(low=-0.01, high=0.01, size=(int(num_hidden_units), ordered_training_set_feature_matrix.shape[1]))
    w_h_o = np.random.uniform(low=-0.01, high=0.01, size=(1, int(num_hidden_units) + 1))

    # nn SGD
    class_list = training_set["metadata"]["features"][-1][1]
    for epoch in range(int(num_epoches)):
        num_corr = 0
        num_incorr = 0
        sum_E = 0
        for idx in range(total_num_training):
            # index indicate hidden unit, 1d
            net_i_h = np.dot(ordered_training_set_feature_matrix[idx,:],np.transpose(w_i_h))
            h = func.sigmoid(net_i_h)
            # adding bias entry
            h_o = np.insert(h,0,1).astype(float)
            net_h_o = np.dot(w_h_o, h_o)
            o = func.sigmoid(net_h_o)
            y = training_set_label_matrix[idx,0]
            if class_list.index(y) == 0:
                y = 0
            else:
                y = 1
            E = -y * np.log(o) - (1 - y) * np.log(1 - o)
            sum_E += E
            d_o = y - o
            d_h = h_o*(1 - h_o)*d_o*w_h_o
            update_h_o = float(learning_rate)*d_o*h_o
            update_i_h = float(learning_rate)*d_h[:,1]*ordered_training_set_feature_matrix[idx,:]
            for curcol in range(2,d_h.shape[1]):
                temp = float(learning_rate)*d_h[:,curcol]*ordered_training_set_feature_matrix[idx,:]
                update_i_h = np.vstack((update_i_h,temp))
            w_i_h += update_i_h
            w_h_o += update_h_o

            pred = 0
            if o > 0.5:
                pred = 1
            else:
                pred = 0

            if pred == y:
                num_corr +=1
            else:
                num_incorr +=1

        print(str(epoch+1)+ " {:.12f}".format(sum_E[0])+ " " + str(num_corr) + " " + str(num_incorr))

    # prediction on test set
    num_corr = 0
    num_incorr = 0
    # true positive
    tp = 0
    # predicted positive
    pp = 0
    for idx in range(total_num_test):
        # index indicate hidden unit, 1d
        net_i_h = np.dot(ordered_test_set_feature_matrix[idx,:],np.transpose(w_i_h))
        h = func.sigmoid(net_i_h)
        # adding bias entry
        h_o = np.insert(h,0,1).astype(float)
        net_h_o = np.dot(w_h_o, h_o)
        o = func.sigmoid(net_h_o)
        y = test_set_label_matrix[idx,0]
        if class_list.index(y) == 0:
            y = 0
        else:
            y = 1

        pred = 0
        if o > 0.5:
            pred = 1
            pp += 1
        else:
            pred = 0

        if pred == y:
            num_corr +=1
            if pred == 1:
                tp += 1
        else:
            num_incorr +=1
        print("{:.12f} ".format(o[0]) + str(pred) + " " + str(y))
    print(str(num_corr) + " " + str(num_incorr))
    actual_pos = np.sum(test_set_label_matrix == class_list[1])
    recall = tp/actual_pos
    precision = tp/pp
    F1 = 2*precision*recall/(precision + recall)
    print("{:.12f}".format(F1))
示例#10
0
        #print(trainDataArray[d,:])
        currData = np.append(currData, trainDataArray[d, :])
        #print(currData)
        currData = currData[:, None]
        #print(repr(devWeight1Array_GD))
        #print(repr(currData))
        hiddenLayerInput = np.dot(devWeight1Array_GD, currData)
        #print(repr(hiddenLayerInput))
        hiddenLayerOutput = vectSigmoid(hiddenLayerInput)
        hiddenLayerOutput = np.insert(hiddenLayerOutput, 0, 1)
        hiddenLayerOutput = hiddenLayerOutput[:, None]
        #print(repr(hiddenLayerOutput))
        #print(repr(devWeight2Array_GD))
        finalLayerInput = np.dot(devWeight2Array_GD, hiddenLayerOutput)
        #print(finalLayerInput)
        finalLayerOutput = sigmoid(finalLayerInput)
        #print(finalLayerOutput)

        # calculate error signal of output neuron
        errorOutputNeuron = -finalLayerOutput*(1-finalLayerOutput)* \
         (trainKeyArray[d]-finalLayerOutput)
        #print(finalLayerOutput)
        #print(trainKeyArray[d])
        #print(errorOutputNeuron)

        errorHiddenLayer = np.zeros([1, 4])
        for i in range(4):
            #print(i)
            if i == 0:
                continue
            else:
示例#11
0
def main():
    # simple arguments check
    if len(sys.argv) != 5:
        print("wrong number of arguments")
        sys.exit(0)

    # passing args
    learning_rate = sys.argv[1]
    max_epoch = sys.argv[2]
    training_set_path = sys.argv[3]
    test_set_path = sys.argv[4]

    # load data
    training_set, test_set = func.load_data(training_set_path, test_set_path)
    # find index of num and category features
    num_feature_index_list, cate_feature_index_list = func.classify_features(
        training_set, test_set)
    # getting traning set matrices
    training_set_num_feature_matrix, training_set_cate_feature_matrix, training_set_label_matrix = func.get_matrices(
        training_set, num_feature_index_list, cate_feature_index_list)
    test_set_num_feature_matrix, test_set_cate_feature_matrix, test_set_label_matrix = func.get_matrices(
        test_set, num_feature_index_list, cate_feature_index_list)
    # get the total number of training and test instances
    total_num_training = training_set_num_feature_matrix.shape[0]
    total_num_test = test_set_num_feature_matrix.shape[0]
    # fill up the feature status list
    # [num, cate]
    feature_status = [0, 0]
    if training_set_num_feature_matrix.size != 0:
        feature_status[0] = 1
    if training_set_label_matrix.size != 0:
        feature_status[1] = 1
    # standardize num features
    normed_training_set_num_feature_matrix = np.zeros((total_num_training, 0))
    normed_test_set_num_feature_matrix = np.zeros((total_num_test, 0))
    if feature_status[0] == 1:
        normed_training_set_num_feature_matrix, normed_test_set_num_feature_matrix = func.standardize(
            training_set_num_feature_matrix, test_set_num_feature_matrix)

    # combine the feature matrix, reorder
    combined_index_list = num_feature_index_list + cate_feature_index_list
    sorted_index_list = np.argsort(combined_index_list)
    combined_training_set_feature_matrix = np.hstack(
        (normed_training_set_num_feature_matrix,
         training_set_cate_feature_matrix))
    combined_test_set_feature_matrix = np.hstack(
        (normed_test_set_num_feature_matrix, test_set_cate_feature_matrix))
    ordered_training_set_feature_matrix = combined_training_set_feature_matrix[:,
                                                                               sorted_index_list]
    ordered_test_set_feature_matrix = combined_test_set_feature_matrix[:,
                                                                       sorted_index_list]

    # one-hot
    num_to_skip = 0
    for idx, original_idx in enumerate(cate_feature_index_list):
        variant_list = training_set["metadata"]["features"][:-1][original_idx][
            1]

        cur_training_col = training_set_cate_feature_matrix[:, idx]
        cur_test_col = test_set_cate_feature_matrix[:, idx]

        for jdx, variant in enumerate(variant_list):
            cur_training_col[cur_training_col == variant] = jdx
            cur_test_col[cur_test_col == variant] = jdx

        cur_training_col = cur_training_col.astype(int)
        cur_test_col = cur_test_col.astype(int)

        expanded_training_cols = np.zeros(
            (total_num_training, len(variant_list)))
        expanded_training_cols[np.arange(total_num_training),
                               cur_training_col.flatten()] = 1
        expanded_test_cols = np.zeros((total_num_test, len(variant_list)))
        expanded_test_cols[np.arange(total_num_test),
                           cur_test_col.flatten()] = 1

        ordered_training_set_feature_matrix = np.delete(
            ordered_training_set_feature_matrix,
            original_idx + num_to_skip,
            axis=1)
        ordered_training_set_feature_matrix = np.insert(
            ordered_training_set_feature_matrix, [original_idx + num_to_skip],
            expanded_training_cols,
            axis=1)
        ordered_test_set_feature_matrix = np.delete(
            ordered_test_set_feature_matrix,
            original_idx + num_to_skip,
            axis=1)
        ordered_test_set_feature_matrix = np.insert(
            ordered_test_set_feature_matrix, [original_idx + num_to_skip],
            expanded_test_cols,
            axis=1)
        num_to_skip += (len(variant_list) - 1)

    # append bias entry
    ordered_training_set_feature_matrix = np.insert(
        ordered_training_set_feature_matrix, 0, 1, axis=1).astype(float)
    ordered_test_set_feature_matrix = np.insert(
        ordered_test_set_feature_matrix, 0, 1, axis=1).astype(float)

    # SGD
    F1_training = []
    F1_test = []
    class_list = training_set["metadata"]["features"][-1][1]
    for num_epoches in range(1, int(max_epoch) + 1):
        # initialize weight
        w = np.random.uniform(
            low=-0.01,
            high=0.01,
            size=(1, ordered_training_set_feature_matrix.shape[1]))
        for epoch in range(num_epoches):
            for idx in range(total_num_training):
                net = np.dot(w, ordered_training_set_feature_matrix[idx, :])
                o = func.sigmoid(net)
                y = training_set_label_matrix[idx, 0]
                if class_list.index(y) == 0:
                    y = 0
                else:
                    y = 1
                E = -y * np.log(o) - (1 - y) * np.log(1 - o)
                grad = (o - y) * ordered_training_set_feature_matrix[idx, :]
                update = -float(learning_rate) * grad
                w += update

        # prediction on test set
        num_corr = 0
        num_incorr = 0
        # true positive
        tp = 0
        # predicted positive
        pp = 0
        for idx in range(total_num_test):
            net = np.dot(w, ordered_test_set_feature_matrix[idx, :])
            o = func.sigmoid(net)
            y = test_set_label_matrix[idx, 0]
            if class_list.index(y) == 0:
                y = 0
            else:
                y = 1

            pred = 0
            if o > 0.5:
                pred = 1
                pp += 1
            else:
                pred = 0

            if pred == y:
                num_corr += 1
                if pred == 1:
                    tp += 1
            else:
                num_incorr += 1
        actual_pos = np.sum(test_set_label_matrix == class_list[1])
        recall = tp / actual_pos
        precision = tp / pp
        F1 = 2 * precision * recall / (precision + recall)
        F1_test.append(F1)

        # prediction on training set
        num_corr = 0
        num_incorr = 0
        # true positive
        tp = 0
        # predicted positive
        pp = 0
        for idx in range(total_num_training):
            net = np.dot(w, ordered_training_set_feature_matrix[idx, :])
            o = func.sigmoid(net)
            y = training_set_label_matrix[idx, 0]
            if class_list.index(y) == 0:
                y = 0
            else:
                y = 1

            pred = 0
            if o > 0.5:
                pred = 1
                pp += 1
            else:
                pred = 0

            if pred == y:
                num_corr += 1
                if pred == 1:
                    tp += 1
            else:
                num_incorr += 1
        actual_pos = np.sum(training_set_label_matrix == class_list[1])
        recall = tp / actual_pos
        precision = tp / pp
        F1 = 2 * precision * recall / (precision + recall)
        F1_training.append(F1)

    plt.plot(range(1,
                   int(max_epoch) + 1),
             F1_training,
             label="on training set")
    plt.plot(range(1, int(max_epoch) + 1), F1_test, label="on test set")
    plt.title("F1 vs #epoches on heart dataset, learning rate = 0.05")
    plt.ylabel("F1")
    plt.xlabel("#epoches")
    plt.legend()
    plt.show()
示例#12
0
    input_value = [age[iteration], cr_amnt[iteration], duration[iteration]]
    current_in.append(input_value)
    current_out.append(risk[iteration])

    # and then we will make numpy arrays, they are easier to use in this case

    train_in = np.array(current_in)
    train_out = np.array(current_out).T

    # There is a learning process

    for itera in range(cluster_training_iterations[clusters[iteration] - 1]):
        input_layer = train_in  # our input layer is now numpy array we made above
        output_layer = sigmoid(
            np.dot(input_layer,
                   weights[clusters[iteration] -
                           1]))  # to predict output we use sigmoid function
        error = train_out - output_layer  # we need to compute the error
        adj = error * sig_der(
            output_layer
        )  # and then compute adjustment by using sigmoid derivative
        weights[clusters[iteration] - 1] += np.dot(
            input_layer.T, adj)  # and then just update our weights

    clusters_done[
        clusters[iteration] -
        1] += 1  # after each lerning process we add the set to the done of the clusters

    # there we subtract iterations of clusters to prevent from overtrain

    if clusters_done[clusters[iteration] - 1] % int(
示例#13
0
def main():
    ## Load Data
    # The first two columns contains the exam scores and the third column
    # contains the label.

    data = np.loadtxt("./ex2data1.txt", delimiter=',')
    X = data[:, :2]
    y = data[:, 2]

    ## Part 1: Plotting
    print("""Plotting data with + indicating (y = 1) examples and o 
            indicating (y = 0) examples.""")
    plotData(X, y)
    plt.xlabel("Exam 1 score")
    plt.ylabel("Exam 2 score")
    plt.legend(["Admitted", "Not admitted"])

    ## Part 2: Compute Cost and Gradient

    # Setup the data matrix appropriately, and add ones for the intercept term
    m, n = X.shape

    # Add intercept term to x and X_test
    X = np.hstack([np.ones((m, 1)), X])

    # Initialize fitting parameters
    initial_theta = np.zeros((n + 1, 1))

    # Compute and display initial cost and gradient
    cost = costFunction(initial_theta, X, y)
    grad = gradient(initial_theta, X, y)

    print("\nCost at initial theta (zeros): {}".format(cost))
    print("Expected cost (approx): 0.693")
    print("Gradient at initial theta (zeros):\n{}".format(grad))
    print("Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628")

    # Compute and display cost and gradient with non-zero theta
    test_theta = np.array([-24, 0.2, 0.2])
    cost = costFunction(test_theta, X, y)
    grad = gradient(test_theta, X, y)

    print("\nCost at test theta: {}".format(cost))
    print("Expected cost (approx): 0.218")
    print("Gradient at test theta:\n{}".format(grad))
    print("Expected gradients (approx):\n 0.043\n 2.566\n 2.647")

    ## Part 3: Optimizing using fminunc
    options = {"maxiter": 400}
    ## Two implementation here
    # theta, cost = scipy_fminunc(costFunction, initial_theta, (X, y), options)
    theta, cost = tf_fmin(X, y, initial_theta)

    # Print theta to screen
    print("Cost at theta found by fminunc: {}".format(cost))
    print("Expected cost (approx): 0.203")
    print("theta: {}".format(theta))
    print("Expected theta (approx): \n-25.161\n 0.206\n 0.201")

    # Plot Boundary
    plotDecisionBoundary(theta, X, y)

    # Put some labels
    plt.xlabel("Exam 1 score")
    plt.ylabel("Exam 2 score")

    # Legend, specific for the exercise
    plt.legend(["Admitted", "Not admitted", "Decision Boundary"])
    plt.axis([30, 100, 30, 100])

    ## Part 4: Predict and Accuracies
    prob = sigmoid(np.array([1, 45, 85]) @ theta)
    print(
        "For a student with scores 45 and 85, we predict an admission probability of {}"
        .format(prob))
    print("Expected value: 0.775 +/- 0.002")

    # Compute accuracy on our training set
    p = predict(theta, X)

    print("Train Accuracy: {}".format(
        np.mean(np.float64(p == y.reshape(-1, 1))) * 100))
    print("Expected accuracy (approx): 89.0")

    plt.show()