示例#1
0
def main():
    # args from Simple Queries paper
    DIM = 30
    WORDGRAMS = 2
    MINCOUNT = 8
    MINN = 3
    MAXN = 3
    BUCKET = 1000000

    # adjust these
    EPOCH = 5
    LR = 0.15  # 0.15 good for ~5000
    KERN = 'lin'  # lin or rbf or poly
    NUM_RUNS = 1  # number of test runs
    SUBSET_VAL = 300  # number of subset instances for self reported dataset
    LIN_C = 0.90  # hyperparameter for linear kernel

    run = 0

    print("starting dictionary creation.............................")
    dictionary = Dictionary(WORDGRAMS, MINCOUNT, BUCKET, SUBSET_VAL, run)
    X_train, X_test, y_train, y_test = dictionary.get_train_and_test()
    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

    n_train = dictionary.get_n_train_instances()
    n_test = dictionary.get_n_manual_instances()

    X_train = dictionary.get_trainset()
    X_test = dictionary.get_manual_testset()

    print()
    print("starting optimization")
    #coef = kernel_mean_matching(X_train, X_test, n_train, n_test, kern='lin', B=10)
    coef = kernel_mean_matching(X_test, X_train[0], LIN_C, kern='lin', B=10)
    print(coef)
def create_dictionary(WORDGRAMS, MINCOUNT, BUCKET, KERN, SUBSET_VAL, LIN_C, run, model_version):
    
    print("starting dictionary creation") 

    # dictionary must be recreated each run to get different subsample each time
    # initialize training
    start = time.time()
    dictionary = Dictionary(WORDGRAMS, MINCOUNT, BUCKET, KERN, SUBSET_VAL, LIN_C, run, model=model_version)
    end = time.time()
    print("dictionary took ", (end - start)/60.0, " time to create.")
    
    return dictionary
def main():

    # args from Simple Queries paper
    DIM = 30
    WORDGRAMS = 2
    MINCOUNT = 8
    MINN = 3
    MAXN = 3
    BUCKET = 1000000

    # adjust these
    EPOCH = 20
    LR = 0.10  # 0.15 good for ~5000
    KERN = 'lin'  # lin or rbf or poly
    NUM_RUNS = 1  # number of test runs
    SUBSET_VAL = 1000  # number of subset instances for self reported dataset
    LIN_C = 0.90  # hyperparameter for linear kernel

    BATCHSIZE = 1  # number of instances in each batch

    ##### instantiations #######################################

    print("starting dictionary creation")

    # dictionary must be recreated each run to get different subsample each time
    # initialize training
    start = time.time()
    dictionary = Dictionary(WORDGRAMS,
                            MINCOUNT,
                            BUCKET,
                            KERN,
                            SUBSET_VAL,
                            LIN_C,
                            model='original')
    end = time.time()
    print("dictionary took ", (end - start) / 60.0, " time to create.")
    nwords = dictionary.get_nwords()
    nclasses = dictionary.get_nclasses()

    #initialize testing
    X_train, X_test, y_train, y_test = dictionary.get_train_and_test()
    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
    N_train = dictionary.get_n_train_instances()
    N_test = dictionary.get_n_test_instances()

    print("Number of Train instances: ", N_train,
          " Number of Test instances: ", N_test)
    ntrain_eachclass = dictionary.get_nlabels_eachclass_train()
    ntest_eachclass = dictionary.get_nlabels_eachclass_test()
    print("N each class TRAIN: ", ntrain_eachclass, " N each class TEST: ",
          ntest_eachclass)

    # manual labeled set (Kaggle dataset)
    X_manual = dictionary.get_manual_testset()
    y_manual = dictionary.get_manual_set_labels()
    N_manual = dictionary.get_n_manual_instances()
    print()
    print("Number of Manual testing instances: ", N_manual, " shape: ",
          X_manual.shape)
    nmanual_eachclass = dictionary.get_nlabels_eachclass_manual()
    print("N each class Manual testing instances: ", nmanual_eachclass)
    print("#####################################")

    p = X_train.shape[1]

    # A
    #A_n = nwords + BUCKET   # cols
    A_n = p
    A_m = DIM  # rows
    uniform_val = 1.0 / DIM
    np.random.seed(0)
    A = np.random.uniform(-uniform_val, uniform_val, (A_m, A_n))

    # B
    B_n = DIM  # cols
    B_m = nclasses  # rows
    B = np.zeros((B_m, B_n))

    #### train ################################################

    print("A: ", A.shape)
    print("B: ", B.shape)
    print("X_trian: ", X_train.shape)
    print("labels: ", y_train.shape)

    losses_train = []
    losses_test = []
    losses_manual = []

    print()
    print()

    X_train = normalize(X_train, axis=1, norm='l1')
    X_test = normalize(X_test, axis=1, norm='l1')
    X_manual = normalize(X_manual, axis=1, norm='l1')

    #X_train_batches = np.vsplit(X_train, BATCHES )
    #X_train = np.toarray(X_train)
    #X_train_batches = np.array_split(X_train.todense(), BATCHES)
    #print("****** ", X_train_batches[0].shape)

    #y_train_batches = np.array_split(y_train, BATCHES)

    traintime_start = time.time()
    for i in range(EPOCH):
        print()
        print("EPOCH: ", i)

        alpha = LR * (1 - i / EPOCH)  # linearly decaying lr alpha
        train_loss = 0

        start = 0
        batchnum = 0
        while start <= N_train:
            batch = X_train.tocsr()[start:start + BATCHSIZE, :]
            y_train_batch = y_train[start:start + BATCHSIZE, :]

            # Forward Propogation
            hidden = sparse.csr_matrix.dot(A, batch.T)
            hidden = normalize(hidden, axis=1, norm='l1')
            z2 = np.dot(B, hidden)

            # softmax
            Y_hat = softmax(z2, theta=1.0, axis=0)
            #loglike = np.log(Y_hat)
            #train_loss = -np.multiply(y_train_batches[batch_num], loglike.T)  # need to multiply element wise here

            #### Back prop #########################################################
            # B update
            gradient = alpha * np.dot(
                np.subtract(Y_hat.T, y_train_batch).T, hidden.T)
            B = np.subtract(B, gradient)

            # A update
            first = np.dot(np.subtract(Y_hat.T, y_train_batch), B)
            gradient = alpha * sparse.csr_matrix.dot(first.T, batch)
            A = np.subtract(A, gradient)

            batchnum += 1

            if start + BATCHSIZE >= N_train:
                batch = X_train.tocsr()[start:-1, :]  # rest of train set
                y_train_batch = y_train[start:-1, :]

                # Forward Propogation
                hidden = sparse.csr_matrix.dot(A, batch.T)
                hidden = normalize(hidden, axis=1, norm='l1')
                z2 = np.dot(B, hidden)

                # softmax
                Y_hat = softmax(z2, theta=1.0, axis=0)
                #loglike = np.log(Y_hat)
                #train_loss = -np.multiply(y_train_batches[batch_num], loglike.T)  # need to multiply element wise here

                #### Back prop #########################################################
                # B update
                gradient = alpha * np.dot(
                    np.subtract(Y_hat.T, y_train_batch).T, hidden.T)
                B = np.subtract(B, gradient)

                # A update
                first = np.dot(np.subtract(Y_hat.T, y_train_batch), B)
                gradient = alpha * sparse.csr_matrix.dot(first.T, batch)
                A = np.subtract(A, gradient)

                break
            else:
                start = start + BATCHSIZE

        # TRAINING LOSS
        #train_loss = np.sum(train_loss)/N_train
        hidden_train = sparse.csr_matrix.dot(A, X_train.T)
        hidden_train = normalize(hidden_train, axis=1, norm='l1')
        z2_train = np.dot(B, hidden_train)

        Y_hat_train = softmax(z2_train, theta=1.0, axis=0)
        loglike_train = np.log(Y_hat_train)
        train_loss = -np.multiply(
            y_train, loglike_train.T)  # need to multiply element wise here
        train_loss = np.sum(train_loss) / N_train
        print("Train:   ", train_loss)

        ## TESTING LOSS
        hidden_test = sparse.csr_matrix.dot(A, X_test.T)
        hidden_test = normalize(hidden_test, axis=1, norm='l1')
        z2_test = np.dot(B, hidden_test)

        Y_hat_test = softmax(z2_test, theta=1.0, axis=0)
        loglike_test = np.log(Y_hat_test)
        test_loss = -np.multiply(
            y_test, loglike_test.T)  # need to multiply element wise here
        test_loss = np.sum(test_loss) / N_test
        print("Test:    ", test_loss)

        ## MANUAL SET TESTING LOSS
        hidden_man = sparse.csr_matrix.dot(A, X_manual.T)
        hidden_man = normalize(hidden_man, axis=1, norm='l1')
        z2_man = np.dot(B, hidden_man)

        Y_hat_man = softmax(z2_man, theta=1.0, axis=0)
        loglike_manual = np.log(Y_hat_man)
        manual_loss = -np.multiply(
            y_manual, loglike_manual.T)  # need to multiply element wise here
        manual_loss = np.sum(manual_loss) / N_manual

        print("Manual Set:    ", manual_loss)

        #### Back prop #########################################################
        # B update
        #gradient = alpha * np.dot(np.subtract(Y_hat.T, y_train).T, hidden.T)
        #B = np.subtract(B, gradient)

        # A update
        #first = np.dot(np.subtract(Y_hat.T, y_train), B)
        #gradient = alpha * sparse.csr_matrix.dot(first.T, X_train)
        #A = np.subtract(A, gradient)

        #train_class_error, train_precision, train_recall, train_F1, train_AUC, train_FPR, train_TPR = metrics(Y_hat, y_train)
        #test_class_error, test_precision, test_recall, test_F1, test_AUC, test_FPR, test_TPR = metrics(X_test, y_test, A, B, N_test)
        #manual_class_error, manual_precision, manual_recall, manual_F1, manual_AUC, manual_FPR, manual_TPR = metrics(X_manual, y_manual, A, B, N_manual)

        #print()
        #print("TRAIN:")
        #print("         Classification Err: ", train_class_error)
        #print("         Precision:          ", train_precision)
        #print("         Recall:             ", train_recall)
        #print("         F1:                 ", train_F1)

        #print("TEST:")
        #print("         Classification Err: ", test_class_error)
        #print("         Precision:          ", test_precision)
        #print("         Recall:             ", test_recall)
        #print("         F1:                 ", test_F1)

        #print()
        #print("MANUAL:")
        #print("         Classification Err: ", manual_class_error)
        #print("         Precision:          ", manual_precision)
        #print("         Recall:             ", manual_recall)
        #print("         F1:                 ", manual_F1)

        losses_train.append(train_loss)
        losses_test.append(test_loss)
        losses_manual.append(manual_loss)

        i += 1
    traintime_end = time.time()

    print("model took ", (traintime_end - traintime_start) / 60.0,
          " time to train")

    epochs = [l for l in range(EPOCH)]

    plt.plot(epochs, losses_train, 'm', label="train")
    plt.plot(epochs, losses_test, 'c', label="test")
    plt.plot(epochs, losses_manual, 'g', label="manual")
    title = "Main_temp: n_train: ", N_train, " n_test: ", N_test, " n_manual ", N_manual
    plt.title(title)
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(loc='upper left')
    plt.show()
示例#4
0
def main():

    # args from Simple Queries paper
    # DONT adjust these
    DIM = 30
    WORDGRAMS = 3
    MINCOUNT = 2
    MINN = 3
    MAXN = 3
    BUCKET = 1000000

    # adjust these
    EPOCH = 20
    LR = 0.20  #0.15 good for ~5000
    KERN = 'lin'  # lin or rbf or poly
    NUM_RUNS = 5  # number of test runs
    SUBSET_VAL = 1000  # number of subset instances for self reported dataset
    LIN_C = 0.90  # hyperparameter for linear kernel

    model = 'kmm'

    file_names = [
        'KMMoutput/loss_train.txt', 'KMMoutput/loss_test.txt',
        'KMMoutput/loss_manual.txt', 'KMMoutput/error_train.txt',
        'KMMoutput/error_test.txt', 'KMMoutput/error_manual.txt',
        'KMMoutput/precision_train.txt', 'KMMoutput/precision_test.txt',
        'KMMoutput/precision_manual.txt', 'KMMoutput/recall_train.txt',
        'KMMoutput/recall_test.txt', 'KMMoutput/recall_manual.txt',
        'KMMoutput/F1_train.txt', 'KMMoutput/F1_test.txt',
        'KMMoutput/F1_manual.txt', 'KMMoutput/AUC_train.txt',
        'KMMoutput/AUC_test.txt', 'KMMoutput/AUC_manual.txt'
    ]

    create_readme(DIM, WORDGRAMS, MINCOUNT, MINN, MAXN, BUCKET, EPOCH, LR,
                  KERN, NUM_RUNS, SUBSET_VAL, LIN_C)

    ##### instantiations #######################################

    for run in range(NUM_RUNS):
        print(
            "*******************************************************RUN NUMBER: ",
            run)
        print()

        print("starting dictionary creation")

        # dictionary must be recreated each run to get different subsample each time
        # initialize training
        start = time.time()
        dictionary = Dictionary(WORDGRAMS, MINCOUNT, BUCKET, KERN, SUBSET_VAL,
                                LIN_C, model)
        end = time.time()
        print("Dictionary took ", (end - start) / 60.0, " minutes to create.")
        nwords = dictionary.get_nwords()
        nclasses = dictionary.get_nclasses()

        #initialize testing
        X_train, X_test, y_train, y_test = dictionary.get_train_and_test()
        print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
        N_train = dictionary.get_n_train_instances()
        N_test = dictionary.get_n_test_instances()

        print("Number of Train instances: ", N_train,
              " Number of Test instances: ", N_test)
        ntrain_eachclass = dictionary.get_nlabels_eachclass_train()
        ntest_eachclass = dictionary.get_nlabels_eachclass_test()
        print("N each class TRAIN: ", ntrain_eachclass, " N each class TEST: ",
              ntest_eachclass)

        # manual labeled set (Kaggle dataset)
        X_manual = dictionary.get_manual_testset()
        y_manual = dictionary.get_manual_set_labels()
        N_manual = dictionary.get_n_manual_instances()
        print()
        print("Number of Manual testing instances: ", N_manual, " shape: ",
              X_manual.shape)
        nmanual_eachclass = dictionary.get_nlabels_eachclass_manual()
        print("N each class Manual testing instances: ", nmanual_eachclass)
        print("#####################################")

        p = X_train.shape[1]

        # A
        #A_n = nwords + BUCKET   # cols
        A_n = p
        A_m = DIM  # rows
        uniform_val = 1.0 / DIM
        np.random.seed(0)
        A = np.random.uniform(-uniform_val, uniform_val, (A_m, A_n))

        # B
        B_n = DIM  # cols
        B_m = nclasses  # rows
        B = np.zeros((B_m, B_n))

        beta = dictionary.get_optbeta(
        )  # NOTE: optimal KMM reweighting coefficient

        # NOTE: run with ones to check implementation. Should get values close to original (w/out reweithting coef)
        #beta = np.ones((N_train))

        #### train ################################################

        print()
        print()

        for i in range(EPOCH):
            print()
            print("EPOCH: ", i)

            # linearly decaying lr alpha
            alpha = LR * (1 - i / EPOCH)

            l = 0
            train_loss = 0

            # TRAINING
            for x in X_train:
                beta_n = beta[l]

                label = y_train[l]
                B_old = B
                A_old = A

                # Forward Propogation
                hidden = sparse.csr_matrix.dot(A_old, x.T)

                if np.sum(x) > 0:
                    a1 = hidden / np.sum(x)
                else:
                    a1 = hidden

                z2 = np.dot(B, a1)
                exps = np.exp(z2 - np.max(z2))
                Y_hat = exps / np.sum(exps)

                # Back prop with alt optimization
                B = gradient_B(B_old, A_old, x, label, nclasses, alpha, DIM,
                               a1, Y_hat, beta_n)
                A = gradient_A(B_old, A_old, x, label, nclasses, alpha, DIM,
                               Y_hat, beta_n)

                # verify gradients
                #check_B_gradient(B_old, A_old, label, x, Y_hat, a1)
                #check_A_gradient(B_old, A_old, label, x, Y_hat)

                loglike = np.log(Y_hat)
                train_loss += -np.dot(label, loglike)

                l += 1

            # TRAINING LOSS
            #train_loss = total_loss_function(X_train, y_train, A, B, N_train)
            train_loss = (1.0 / N_train) * train_loss
            print("Train:   ", train_loss)

            # TESTING LOSS
            test_loss = total_loss_function(X_test, y_test, A_old, B_old,
                                            N_test)
            print("Test:    ", test_loss)

            print("Difference = ", test_loss - train_loss)

            # MANUAL SET TESTING LOSS
            manual_loss = total_loss_function(X_manual, y_manual, A_old, B_old,
                                              N_manual)
            print("Manual Set:    ", manual_loss)

            train_class_error, train_precision, train_recall, train_F1, train_AUC, train_FPR, train_TPR = metrics(
                X_train, y_train, A, B, N_train)
            test_class_error, test_precision, test_recall, test_F1, test_AUC, test_FPR, test_TPR = metrics(
                X_test, y_test, A, B, N_test)
            manual_class_error, manual_precision, manual_recall, manual_F1, manual_AUC, manual_FPR, manual_TPR = metrics(
                X_manual, y_manual, A, B, N_manual)

            print()
            print("TRAIN:")
            print("         Classification Err: ", train_class_error)
            print("         Precision:          ", train_precision)
            print("         Recall:             ", train_recall)
            print("         F1:                 ", train_F1)

            print("TEST:")
            print("         Classification Err: ", test_class_error)
            print("         Precision:          ", test_precision)
            print("         Recall:             ", test_recall)
            print("         F1:                 ", test_F1)

            print()
            print("MANUAL:")
            print("         Classification Err: ", manual_class_error)
            print("         Precision:          ", manual_precision)
            print("         Recall:             ", manual_recall)
            print("         F1:                 ", manual_F1)

            #### WRITING LOSSES
            with open('KMMoutput/loss_train.txt', '+a') as f:
                f.write("%s," % train_loss)

            with open('KMMoutput/loss_test.txt', '+a') as f:
                f.write("%s," % test_loss)

            with open('KMMoutput/loss_manual.txt', '+a') as f:
                f.write("%s," % manual_loss)

            #### WRITING ERROR
            with open('KMMoutput/error_train.txt', '+a') as f:
                f.write("%s," % train_class_error)

            with open('KMMoutput/error_test.txt', '+a') as f:
                f.write("%s," % test_class_error)

            with open('KMMoutput/error_manual.txt', '+a') as f:
                f.write("%s," % manual_class_error)

            #### WRITING PRECISION
            with open('KMMoutput/precision_train.txt', '+a') as f:
                f.write("%s," % train_precision)

            with open('KMMoutput/precision_test.txt', '+a') as f:
                f.write("%s," % test_precision)

            with open('KMMoutput/precision_manual.txt', '+a') as f:
                f.write("%s," % manual_precision)

            #### WRITING RECALL
            with open('KMMoutput/recall_train.txt', '+a') as f:
                f.write("%s," % train_recall)

            with open('KMMoutput/recall_test.txt', '+a') as f:
                f.write("%s," % test_recall)

            with open('KMMoutput/recall_manual.txt', '+a') as f:
                f.write("%s," % manual_recall)

            #### WRITING F1
            with open('KMMoutput/F1_train.txt', '+a') as f:
                f.write("%s," % train_F1)

            with open('KMMoutput/F1_test.txt', '+a') as f:
                f.write("%s," % test_F1)

            with open('KMMoutput/F1_manual.txt', '+a') as f:
                f.write("%s," % manual_F1)

            #### WRITING AUC
            with open('KMMoutput/AUC_train.txt', '+a') as f:
                f.write("%s," % train_AUC)

            with open('KMMoutput/AUC_test.txt', '+a') as f:
                f.write("%s," % test_AUC)

            with open('KMMoutput/AUC_manual.txt', '+a') as f:
                f.write("%s," % manual_AUC)

            i += 1

        # writing newline to file after each trial
        for name in file_names:
            with open(name, '+a') as f:
                f.write('\n')

        run += 1
示例#5
0
    return n_tr / n_te * np.sum(_sum)


def get_optbeta():
    return beta


###################################################################

WORDGRAMS = 3
MINCOUNT = 2
BUCKET = 1000000

print("starting dictionary creation.............................")
dictionary = Dictionary(WORDGRAMS, MINCOUNT, BUCKET)
X_train, X_test, y_train, y_test = dictionary.get_train_and_test()
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

n_train = dictionary.get_n_train_instances()
n_test = dictionary.get_n_manual_instances()

X_train = dictionary.get_trainset()
X_test = dictionary.get_manual_testset()

B = n_train
#sigma = np.std(X_train)  # compute standard deviation ????
sigma = 0.25

b = (0.0, B)
bounds = (b, b, b, b, b)
示例#6
0
def main():

    # args from Simple Queries paper
    DIM = 30
    LR = 0.20  #0.15 good for ~5000
    WORDGRAMS = 3
    MINCOUNT = 2
    MINN = 3
    MAXN = 3
    BUCKET = 1000000
    EPOCH = 20

    KERN = 'lin'  # lin or rbf or poly
    NUM_RUNS = 5  # number of test runs
    SUBSET_VAL = 1000  # number of subset instances for self reported dataset
    LIN_C = 0.90  # hyperparameter for linear kernel

    print("starting dictionary creation")

    # initialize training
    start = time.time()
    dictionary = Dictionary(WORDGRAMS, MINCOUNT, BUCKET, KERN, SUBSET_VAL,
                            LIN_C)
    end = time.time()
    print("Dictionary took ", (end - start) / 60.0, " minutes to create.")
    nwords = dictionary.get_nwords()
    nclasses = dictionary.get_nclasses()

    #initialize testing
    X_train, X_test, y_train, y_test = dictionary.get_train_and_test()
    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
    N_train = dictionary.get_n_train_instances()
    N_test = dictionary.get_n_test_instances()

    print("Number of Train instances: ", N_train,
          " Number of Test instances: ", N_test)
    ntrain_eachclass = dictionary.get_nlabels_eachclass_train()
    ntest_eachclass = dictionary.get_nlabels_eachclass_test()
    print("N each class TRAIN: ", ntrain_eachclass, " N each class TEST: ",
          ntest_eachclass)

    # manual labeled set (Kaggle dataset)
    X_manual = dictionary.get_manual_testset()
    y_manual = dictionary.get_manual_set_labels()
    N_manual = dictionary.get_n_manual_instances()
    print()
    print("Number of Manual testing instances: ", N_manual, " shape: ",
          X_manual.shape)
    nmanual_eachclass = dictionary.get_nlabels_eachclass_manual()
    print("N each class Manual testing instances: ", nmanual_eachclass)
    print("################################################################")

    beta = dictionary.get_optbeta(
    )  # NOTE: optimal KMM reweighting coefficient

    # NOTE: run with ones to check implementation. Should get values close to original (w/out reweithting coef)
    #beta = np.ones((N_train))

    ##### instantiations #######################################

    p = X_train.shape[1]

    # A
    #A_n = nwords + BUCKET   # cols
    A_n = p
    A_m = DIM  # rows
    uniform_val = 1.0 / DIM
    np.random.seed(0)
    A = np.random.uniform(-uniform_val, uniform_val, (A_m, A_n))

    # B
    B_n = DIM  # cols
    B_m = nclasses  # rows
    B = np.zeros((B_m, B_n))

    #### train ################################################

    losses_train = []
    losses_test = []
    losses_manual = []

    class_error_train = []
    class_error_test = []
    class_error_manual = []

    prec_train = []
    prec_test = []
    prec_manual = []

    recall_train = []
    recall_test = []
    recall_manual = []

    F1_train = []
    F1_test = []
    F1_manual = []

    AUC_train = []
    AUC_test = []
    AUC_manual = []

    print()
    print()

    for i in range(EPOCH):
        print()
        print("EPOCH: ", i)

        # linearly decaying lr alpha
        alpha = LR * (1 - i / EPOCH)

        l = 0
        train_loss = 0

        # TRAINING
        for x in X_train:
            beta_n = beta[l]

            label = y_train[l]
            B_old = B
            A_old = A

            # Forward Propogation
            hidden = sparse.csr_matrix.dot(A_old, x.T)

            if np.sum(x) > 0:
                a1 = hidden / np.sum(x)
            else:
                a1 = hidden

            z2 = np.dot(B, a1)
            exps = np.exp(z2 - np.max(z2))
            Y_hat = exps / np.sum(exps)

            # Back prop with alt optimization
            B = gradient_B(B_old, A_old, x, label, nclasses, alpha, DIM, a1,
                           Y_hat, beta_n)
            A = gradient_A(B_old, A_old, x, label, nclasses, alpha, DIM, Y_hat,
                           beta_n)

            # verify gradients
            #check_B_gradient(B_old, A_old, label, x, Y_hat, a1)
            #check_A_gradient(B_old, A_old, label, x, Y_hat)

            loglike = np.log(Y_hat)
            #train_loss += -beta_n * np.dot(label, loglike)
            train_loss += -np.dot(label, loglike)

            l += 1

        # TRAINING LOSS
        #train_loss = total_loss_function(X_train, y_train, A, B, N_train)
        train_loss = (1.0 / N_train) * train_loss
        print("Train:   ", train_loss)

        # TESTING LOSS
        test_loss = total_loss_function(X_test, y_test, A_old, B_old, N_test,
                                        beta)
        print("Test:    ", test_loss)

        print("Difference = ", test_loss - train_loss)

        # MANUAL SET TESTING LOSS
        manual_loss = total_loss_function(X_manual, y_manual, A_old, B_old,
                                          N_manual, beta)
        print("Manual Set:    ", manual_loss)

        train_class_error, train_precision, train_recall, train_F1, train_AUC, train_FPR, train_TPR = metrics(
            X_train, y_train, A, B, N_train)
        test_class_error, test_precision, test_recall, test_F1, test_AUC, test_FPR, test_TPR = metrics(
            X_test, y_test, A, B, N_test)
        manual_class_error, manual_precision, manual_recall, manual_F1, manual_AUC, manual_FPR, manual_TPR = metrics(
            X_manual, y_manual, A, B, N_manual)

        print()
        print("TRAIN:")
        print("         Classification Err: ", train_class_error)
        print("         Precision:          ", train_precision)
        print("         Recall:             ", train_recall)
        print("         F1:                 ", train_F1)

        print("TEST:")
        print("         Classification Err: ", test_class_error)
        print("         Precision:          ", test_precision)
        print("         Recall:             ", test_recall)
        print("         F1:                 ", test_F1)

        print()
        print("MANUAL:")
        print("         Classification Err: ", manual_class_error)
        print("         Precision:          ", manual_precision)
        print("         Recall:             ", manual_recall)
        print("         F1:                 ", manual_F1)

        losses_train.append(train_loss)
        losses_test.append(test_loss)
        losses_manual.append(manual_loss)

        class_error_train.append(train_class_error)
        class_error_test.append(test_class_error)
        class_error_manual.append(manual_class_error)

        prec_train.append(train_precision)
        prec_test.append(test_precision)
        prec_manual.append(manual_precision)

        recall_train.append(train_recall)
        recall_test.append(test_recall)
        recall_manual.append(manual_recall)

        F1_train.append(train_F1)
        F1_test.append(test_F1)
        F1_manual.append(manual_F1)

        AUC_train.append(train_AUC)
        AUC_test.append(test_AUC)
        AUC_manual.append(manual_AUC)

        i += 1

    epochs = [l for l in range(EPOCH)]

    txt = "LR: ", LR, " Kern: ", KERN,

    plt.plot(epochs, losses_train, 'm', label="train")
    plt.plot(epochs, losses_test, 'c', label="test")
    plt.plot(epochs, losses_manual, 'g', label="manual")
    plt.ylabel('loss')
    plt.xlabel('epoch')
    title = "KMM LOSS, n_train: ", N_train, " n_test: ", N_test, " n_manual ", N_manual
    plt.title(title)
    plt.legend(loc='upper left')
    plt.text(.5, .05, txt, ha='center')
    plt.show()

    plt.plot(epochs,
             class_error_train,
             'm',
             label="train classification error")
    plt.plot(epochs, class_error_test, 'c', label="test classification error")
    plt.plot(epochs,
             class_error_manual,
             'g',
             label="manual classification error")
    plt.ylabel('loss')
    plt.xlabel('epoch')
    title = "KMM CLASS ERROR, n_train: ", N_train, " n_test: ", N_test, " n_manual ", N_manual, " kern: ", KERN
    plt.title(title)
    plt.legend(loc='upper left')
    plt.text(.5, .05, txt, ha='center')
    plt.show()
def main():

    # args from Simple Queries paper
    DIM = 30
    WORDGRAMS = 2
    MINCOUNT = 8
    MINN = 3
    MAXN = 3
    BUCKET = 1000000

    # adjust these
    EPOCH = 20
    LR = 0.15  # 0.15 good for ~5000
    KERN = 'lin'  # lin or rbf or poly
    NUM_RUNS = 1  # number of test runs
    SUBSET_VAL = 800  # number of subset instances for self reported dataset
    LIN_C = 0.90  # hyperparameter for linear kernel

    BATCHSIZE = 2  # number of instances in each batch

    ##### instantiations #######################################

    print("starting dictionary creation")

    # dictionary must be recreated each run to get different subsample each time
    # initialize training
    start = time.time()
    dictionary = Dictionary(WORDGRAMS,
                            MINCOUNT,
                            BUCKET,
                            KERN,
                            SUBSET_VAL,
                            LIN_C,
                            model='original')
    end = time.time()
    print("dictionary took ", (end - start) / 60.0, " time to create.")

    nwords = dictionary.get_nwords()
    nclasses = dictionary.get_nclasses()

    #initialize testing
    X_train, X_test, y_train, y_test = dictionary.get_train_and_test()
    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
    N_train = dictionary.get_n_train_instances()
    N_test = dictionary.get_n_test_instances()

    print("Number of Train instances: ", N_train,
          " Number of Test instances: ", N_test)
    ntrain_eachclass = dictionary.get_nlabels_eachclass_train()
    ntest_eachclass = dictionary.get_nlabels_eachclass_test()
    print("N each class TRAIN: ", ntrain_eachclass, " N each class TEST: ",
          ntest_eachclass)

    # manual labeled set (Kaggle dataset)
    X_manual = dictionary.get_manual_testset()
    y_manual = dictionary.get_manual_set_labels()
    N_manual = dictionary.get_n_manual_instances()
    print()
    print("Number of Manual testing instances: ", N_manual, " shape: ",
          X_manual.shape)
    nmanual_eachclass = dictionary.get_nlabels_eachclass_manual()
    print("N each class Manual testing instances: ", nmanual_eachclass)
    print("#####################################")

    p = X_train.shape[1]

    # A
    #A_n = nwords + BUCKET   # cols
    A_n = p
    A_m = DIM  # rows
    uniform_val = 1.0 / DIM
    np.random.seed(0)
    A = np.random.uniform(-uniform_val, uniform_val, (A_m, A_n))

    # B
    B_n = DIM  # cols
    B_m = nclasses  # rows
    B = np.zeros((B_m, B_n))

    #### train ################################################

    losses_train = []
    losses_test = []
    losses_manual = []

    print()
    print()

    traintime_start = time.time()
    for i in range(EPOCH):
        print()
        print("EPOCH: ", i)

        # linearly decaying lr alpha
        alpha = LR * (1 - i / EPOCH)

        l = 0
        train_loss = 0

        start = 0
        batchnum = 0
        while start <= N_train:
            batch = X_train.tocsr()[start:start + BATCHSIZE, :]
            y_train_batch = y_train[start:start + BATCHSIZE, :]

            B_old = B
            A_old = A

            # Forward Propogation
            hidden = sparse.csr_matrix.dot(A, batch.T)
            sum_ = np.sum(batch, axis=1)
            sum_[sum_ == 0] = 1  # replace zeros with ones so divide will work
            sum_ = np.array(sum_).flatten()

            a1 = (hidden.T / sum_[:, None]).T
            z2 = np.dot(B, a1)
            Y_hat = stable_softmax(z2)

            # Back prop with alt optimization
            B = gradient_B(B_old, A_old, y_train_batch, alpha, a1, Y_hat)
            A = gradient_A(B_old, A_old, batch, y_train_batch, alpha, sum_,
                           Y_hat)

            #loglike = np.log(Y_hat)
            #train_loss += -np.dot(y_train_batch, loglike)

            batchnum += 1

            # NOTE figure this out, Might be missing last sample
            if start + BATCHSIZE >= N_train and start < N_train - 1:

                batch = X_train.tocsr()[start:-1, :]  # rest of train set
                y_train_batch = y_train[start:-1, :]

                B_old = B
                A_old = A

                # Forward Propogation
                hidden = sparse.csr_matrix.dot(A, batch.T)

                sum_ = np.sum(batch, axis=1)
                sum_[sum_ ==
                     0] = 1  # replace zeros with ones so divide will work
                sum_ = np.array(sum_).flatten()

                a1 = (hidden.T / sum_[:, None]).T
                z2 = np.dot(B, a1)
                Y_hat = stable_softmax(z2)

                # Back prop with alt optimization
                B = gradient_B(B_old, A_old, y_train_batch, alpha, a1, Y_hat)
                A = gradient_A(B_old, A_old, batch, y_train_batch, alpha, sum_,
                               Y_hat)

                #loglike = np.log(Y_hat)
                #train_loss += -np.dot(y_train_batch, loglike)

                break
            else:
                start = start + BATCHSIZE

        # TRAINING LOSS
        #train_loss = train_loss * (1.0/N_train)
        #print("Train:   ", train_loss)

        train_loss = get_total_loss(A, B, X_train, y_train, N_train)
        print("Train:   ", train_loss)

        ## TESTING LOSS
        test_loss = get_total_loss(A, B, X_test, y_test, N_test)
        print("Test:    ", test_loss)

        #print("Difference = ", test_loss - train_loss)

        ## MANUAL SET TESTING LOSS
        manual_loss = get_total_loss(A, B, X_manual, y_manual, N_manual)
        print("Manual Set:    ", manual_loss)

        #train_class_error, train_precision, train_recall, train_F1, train_AUC, train_FPR, train_TPR = metrics(X_train, y_train, A, B, N_train)
        train_class_error = metrics(X_train, y_train, A, B, N_train)

        #test_class_error, test_precision, test_recall, test_F1, test_AUC, test_FPR, test_TPR = metrics(X_test, y_test, A, B, N_test)
        test_class_error = metrics(X_test, y_test, A, B, N_test)

        #manual_class_error, manual_precision, manual_recall, manual_F1, manual_AUC, manual_FPR, manual_TPR = metrics(X_manual, y_manual, A, B, N_manual)
        manual_class_error = metrics(X_manual, y_manual, A, B, N_manual)

        print()
        print("TRAIN:")
        print("         Classification Err: ", train_class_error)
        #print("         Precision:          ", train_precision)
        #print("         Recall:             ", train_recall)
        #print("         F1:                 ", train_F1)

        print("TEST:")
        print("         Classification Err: ", test_class_error)
        #print("         Precision:          ", test_precision)
        #print("         Recall:             ", test_recall)
        #print("         F1:                 ", test_F1)

        print()
        print("MANUAL:")
        print("         Classification Err: ", manual_class_error)
        #print("         Precision:          ", manual_precision)
        #print("         Recall:             ", manual_recall)
        #print("         F1:                 ", manual_F1)

        losses_train.append(train_loss)
        losses_test.append(test_loss)
        losses_manual.append(manual_loss)

        i += 1
    traintime_end = time.time()

    print("model took ", (traintime_end - traintime_start) / 60.0,
          " time to train")

    epochs = [l for l in range(EPOCH)]

    plt.plot(epochs, losses_train, 'm', label="train")
    plt.plot(epochs, losses_test, 'c', label="test")
    plt.plot(epochs, losses_manual, 'g', label="manual")
    title = "Main_temp: n_train: ", N_train, " n_test: ", N_test, " n_manual ", N_manual
    #title = "Main_temp: n_train: ", N_train, " n_test: ", N_test
    plt.title(title)
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(loc='upper left')
    plt.show()