Пример #1
0
def calculate_accurracy(root, noOfAcids, kMers, train_file, test_file, laplace_alpha, train_end_index = -1):
    csv_path = os.path.join(root, test_file)
    test_x, test_y = bs._load_dataset(csv_path)
    res, _, _ = bs.result_bayes(root, train_file, test_x, kMers, noOfAcids, laplace_alpha, train_end_index)
    #Find index of elements where we predicted cleavable
    trueIndices = np.where(np.array(test_y) == 1)
    #Find index of elements where we predicted nonCleavable
    falseIndices = np.where(np.array(test_y) == 0)
    #Generate results
    accuracy = ((np.sum(res[0,trueIndices]) + (np.size(falseIndices) - np.sum(res[0,falseIndices])))/len(test_x))
    return accuracy
Пример #2
0
def part1(root='./Dataset',
          trainfile='q2_train_set.txt',
          testfile='q2_test_set.txt'):
    #Load Datasets
    noOfMers = 8
    noOfAcids = 20
    csv_path = os.path.join(root, trainfile)
    train_x, train_y = bs._load_dataset(csv_path)
    csv_path = os.path.join(root, testfile)
    test_x, test_y = bs._load_dataset(csv_path)
    #Train
    myRes, _, _ = bs.result_bayes(root, trainfile, test_x, noOfMers, noOfAcids)
    #Find index of elements where we predicted cleavable
    trueIndices = np.where(np.array(test_y) == 1)
    #Find index of elements where we predicted nonCleavable
    falseIndices = np.where(np.array(test_y) == 0)
    #Generate results
    print("Real cleavable number:  \t",
          np.size(trueIndices), "\t Number predicted true cleavable:\t",
          np.sum(myRes[0, trueIndices]), "\t Accuracy:\t",
          np.sum(myRes[0, trueIndices]) / np.size(trueIndices))
    print("Real nonCleavable number:\t", np.size(falseIndices),
          "\t Number predicted true nonCleavable:\t",
          np.size(falseIndices) - np.sum(myRes[0, falseIndices]),
          "\t Accuracy:\t",
          (np.size(falseIndices) - np.sum(myRes[0, falseIndices])) /
          np.size(falseIndices))
    print(
        "Total test size:\t\t", len(test_x),
        "\t Number predicted true in total:\t",
        np.sum(myRes[0, trueIndices]) +
        (np.size(falseIndices) - np.sum(myRes[0, falseIndices])),
        "\t Accuracy:\t",
        ((np.sum(myRes[0, trueIndices]) +
          (np.size(falseIndices) - np.sum(myRes[0, falseIndices]))) /
         len(test_x)))
Пример #3
0
def part6(root='./Dataset', trainfile='q2_train_set.txt'):
    def rotate(angle):
        ax.view_init(azim=angle)

    csv_path = os.path.join(root, trainfile)
    train_x, train_y = bs._load_dataset(csv_path)
    no_of_rows, _ = train_x.shape
    centroid = np.mean(train_x, axis=0)
    std = np.std(train_x, axis=0)
    Z = (train_x - centroid) / std
    Z_transpose = Z.T
    #covariance matrix with up to a constant k
    cov_mat_wk = np.matmul(Z_transpose, Z)
    eig_values, eig_col_vectors = np.linalg.eig(cov_mat_wk)
    idx = eig_values.argsort()[::-1]
    eig_values_sorted = eig_values[idx]
    eig_col_vectors_sorted = eig_col_vectors[:, idx]
    Z_centered = np.matmul(Z, eig_col_vectors_sorted)
    PC1 = Z_centered[:, 0]
    PC2 = Z_centered[:, 1]
    PC3 = Z_centered[:, 2]
    PVE = np.sum(eig_values_sorted[0:3]) / np.sum(eig_values)
    print("PVE: ", PVE)
    #plot
    plt.close('all')
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.scatter(PC1, PC2, PC3, c=PC1, linewidth=0.1)
    ax.set_xlabel('PC 1')
    ax.set_ylabel('PC 2')
    ax.set_zlabel('PC 3')
    ax.view_init(azim=50)
    animation.FuncAnimation(fig,
                            rotate,
                            frames=np.arange(0, 365, 1),
                            interval=0.1)
    print("Please close figures to continue...")
    plt.show()
Пример #4
0
def part5(root='./Dataset',
          trainfile='q2_train_set.txt',
          testfile='q2_test_set.txt'):
    noOfAcids = 20
    kMers = 8
    csv_path = os.path.join(root, trainfile)
    train_x, train_y = bs._load_dataset(csv_path)
    csv_path = os.path.join(root, testfile)
    test_x, test_y = bs._load_dataset(csv_path)
    ##CALCULATE PROBABILITIES##
    N = len(train_x)
    #Find number of ones for each feature
    cleavable = [
        row for index, row in enumerate(train_x) if train_y[index] == 1
    ]
    N11 = np.array(cleavable).sum(axis=0)
    N01 = len(cleavable) - N11
    #Not Cleavables
    notCleavable = [
        row for index, row in enumerate(train_x) if train_y[index] == 0
    ]
    N10 = np.array(notCleavable).sum(axis=0)
    N00 = len(notCleavable) - N10
    N1dot = N10 + N11
    N0dot = N00 + N01
    Ndot1 = len(cleavable)
    Ndot0 = len(notCleavable)
    ##
    sum_term1 = N11 * (np.log2((N * N11) / (N1dot * Ndot1)))
    sum_term2 = N01 * (np.log2((N * N01) / (N0dot * Ndot1)))
    sum_term3 = N10 * (np.log2((N * N10) / (N1dot * Ndot0)))
    sum_term4 = N00 * (np.log2((N * N00) / (N0dot * Ndot0)))
    sum_const = 1 / N
    I_UC = np.multiply(sum_const,
                       (sum_term1 + sum_term2 + sum_term3 + sum_term4))
    ##
    I_UC[np.where(np.isnan(I_UC))] = np.Inf
    I_UC_sort_indices = np.argsort(I_UC)[::-1]
    I_UC_sorted = I_UC[I_UC_sort_indices]
    ##
    trueIndices = np.where(np.array(test_y) == 1)
    falseIndices = np.where(np.array(test_y) == 0)
    learningParams = np.empty(shape=(train_x.shape[0], 0))
    testParams = np.empty(shape=(test_x.shape[0], 0))
    accuracies = []
    for i in range(1, noOfAcids * kMers):
        learningParams = np.hstack(
            (learningParams, train_x[:, I_UC_sort_indices[i - 1:i]]))
        testParams = np.hstack(
            (testParams, test_x[:, I_UC_sort_indices[i - 1:i]]))
        res, _, _ = bs._bayes(learningParams, train_y, testParams, kMers,
                              noOfAcids)
        accuracies.append(
            ((np.sum(res[0, trueIndices]) +
              (np.size(falseIndices) - np.sum(res[0, falseIndices]))) /
             len(test_x)))

    print("Max accuracy:\n",
          np.array(accuracies)[np.where(accuracies == np.max(accuracies))[0]])
    print("k = ", np.where(accuracies == np.max(accuracies))[0])
    #plot
    plt.close('all')
    plt.plot(range(1, noOfAcids * kMers), accuracies, '-k', linewidth=1)
    plt.ylabel('Accuracy (%)')
    plt.xlabel('k')
    plt.grid(True)
    plt.title("k vs. Accuracy")
    print("Please close figures to continue...")
    plt.show()