예제 #1
0
def sample_true_x_y(batch_size, X_train, y_train):
    rand_batch_indices = np.random.randint(0, X_train.shape[0], batch_size)
    x_batch_train = X_train[rand_batch_indices]
    y_batch_train = y_train[rand_batch_indices]
    unrolled_x = utils.convert_to_array(x_batch_train)
    unrolled_y = utils.convert_to_array(y_batch_train)
    return unrolled_x, unrolled_y
예제 #2
0
def main():

    xml_file_test = '../../data/Restaurants_Test_Gold.xml'
    xml_file_train = '../../data/Restaurants_Train.xml'
    yelp_path = '../../data/yelp_lexicon'
    brown_path = '../../data/brown_clusters_tweets/brown_clusters_tweets'

    prompt = "Please enter path to Restaurants_Test_Gold.xml file: "
    xml_file_test_input = input(prompt)
    if xml_file_test_input == '':
        pass
    else:
        xml_file_test = xml_file_test_input
    prompt = "Please enter path to Restaurants_Train.xml file: "
    xml_file_train_input = input(prompt)
    if xml_file_train_input == '':
        pass
    else:
        xml_file_train = xml_file_train_input
    prompt = "Please enter path to yelp lexicon features folder: "
    yelp_path_input = input(prompt)
    if yelp_path_input == '':
        pass
    else:
        yelp_path = yelp_path_input
    prompt = "Please enter path to brown_clusters_tweets file: "
    brown_path_input = input(prompt)
    if brown_path_input == '':
        pass
    else:
        brown_path = brown_path_input
    df_test = xml_category_df(xml_file_test)
    df_train = xml_category_df(xml_file_train)

    df_train['stemmed_text'] = stem_series(df_train.text)
    df_test['stemmed_text'] = stem_series(df_test.text)

    dict_list = load_lexicon_features(yelp_path)

    df_new_train = text_to_score(df_train.text, dict_list)
    df_new_test = text_to_score(df_test.text, dict_list)
    df_test, df_train = concat_dfs(df_test, df_new_test, df_train,
                                   df_new_train)
    test_texts = list(df_test.text)
    train_texts = list(df_train.text)
    cluster_dict = add_brown_clusters(brown_path)
    one_hot_test_list = [one_hotter(cluster_dict, text) for text in test_texts]
    one_hot_train_list = [
        one_hotter(cluster_dict, train) for train in train_texts
    ]
    training_array_clusters = convert_to_array(one_hot_train_list)
    test_array_clusters = convert_to_array(one_hot_test_list)
    X_train, y_train = n_gram_builder(df_train, df_test,
                                      training_array_clusters,
                                      test_array_clusters)
    df_train, df_test = populate_df_with_binary_cols_per_label(
        df_test, df_train)
    predict(X_train, y_train, df_train, df_test)
예제 #3
0
def predict_sequence(test_x, test_y, seq_len, vocab_size, batch_size):
    avg_test_loss = []
    m_loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
    test_dataset_in = tf.data.Dataset.from_tensor_slices(
        (test_x)).batch(batch_size)
    test_dataset_out = tf.data.Dataset.from_tensor_slices(
        (test_y)).batch(batch_size)
    i = 0
    loaded_encoder = tf.keras.models.load_model(
        "data/generated_files/enc_model")
    loaded_generator = tf.keras.models.load_model(
        "data/generated_files/gen_model")
    true_x = list()
    true_y = list()
    predicted_y = list()
    for step, (x, y) in enumerate(zip(test_dataset_in, test_dataset_out)):
        batch_x_test = utils.convert_to_array(x)
        batch_y_test = utils.convert_to_array(y)
        if batch_x_test.shape[0] == batch_size:
            new_tokens = tf.fill([batch_size, 1], 0)
            noise = tf.random.normal((batch_size, enc_units))
            enc_output, enc_state = loaded_encoder(batch_x_test,
                                                   training=False)
            enc_state = tf.math.add(enc_state, noise)
            dec_state = enc_state
            #generated_logits, state = loaded_generator([new_tokens, enc_state], training=False)
            #loss = m_loss(batch_y_test, generated_logits)

            generated_logits, _, loss = gen_step_predict(
                seq_len, batch_size, vocab_size, loaded_generator, dec_state,
                batch_y_test)

            p_y = tf.math.argmax(generated_logits, axis=-1)[1]
            one_x = utils.convert_to_string_list(batch_x_test[1])
            one_y = utils.convert_to_string_list(batch_y_test[1])
            pred_y = utils.convert_to_string_list(p_y)

            true_x.append(one_x)
            true_y.append(one_y)
            predicted_y.append(pred_y)

            print("Test: Batch {} loss: {}".format(str(i), str(loss)))
            avg_test_loss.append(loss)
            i += 1
    true_predicted_df = pd.DataFrame(
        list(zip(true_x, true_y, predicted_y)),
        columns=["True_X", "True_Y", "Predicted_Y"])
    true_predicted_df.to_csv("data/generated_files/true_predicted_df.csv",
                             index=None)
    mean_loss = np.mean(avg_test_loss)
    print("Total test loss: {}".format(str(mean_loss)))
    return mean_loss
예제 #4
0
def min_wrapper(hyp, F, Flag, *varargin):
    # Utilize scipy.optimize functions to minimize the negative log marginal liklihood.  This is REALLY inefficient!
    x = convert_to_array(hyp)

    if Flag == 'CG':
        aa = cg(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=False, full_output=True)
        x = aa[0]; fx = aa[1]; funcCalls = aa[2]; gradcalls = aa[3]
        if aa[4] == 1:
            print "Maximum number of iterations exceeded."
        elif aa[4] ==  2:
            print "Gradient and/or function calls not changing."
        gvals = dnlml(x,F,hyp,varargin)
        return convert_to_class(x,hyp), fx, gvals, funcCalls

    elif Flag == 'BFGS':
        # Use BFGS
        aa = bfgs(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=True, full_output=True)
        x = aa[0]; fvals = aa[1]; gvals = aa[2]; Bopt = aa[3]; funcCalls = aa[4]; gradcalls = aa[5]
        if aa[6] == 1:
            print "Maximum number of iterations exceeded."
        elif aa[6] ==  2:
            print "Gradient and/or function calls not changing."
        return convert_to_class(x,hyp), fvals, gvals, funcCalls

    else:
        raise Exception('Incorrect usage of optimization flag in min_wrapper')
예제 #5
0
def dnlml(x,F,*varargin):
    hyp = varargin[0]
    temp = list(varargin[1:][0])
    temp[-1] = True
    f = lambda z: F(z,*temp)
    X = convert_to_class(x,hyp)
    vargout = f(X)
    z = convert_to_array(vargout[1])
    return z
예제 #6
0
def dnlml(x, F, *varargin):
    hyp = varargin[0]
    temp = list(varargin[1:][0])
    temp[-1] = True
    f = lambda z: F(z, *temp)
    X = convert_to_class(x, hyp)
    vargout = f(X)
    z = convert_to_array(vargout[1])
    return z
예제 #7
0
def min_wrapper(hyp, F, Flag, *varargin):
    # Utilize scipy.optimize functions, sgc.py, or minimize.py to
    # minimize the negative log marginal liklihood.
    
    x = convert_to_array(hyp)   # convert the hyperparameter class to an array

    if Flag == 'CG':
        aa = cg(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=False, full_output=True)
        x = aa[0]; fopt = aa[1]; funcCalls = aa[2]; gradcalls = aa[3]
        if aa[4] == 1:
            print "Maximum number of iterations exceeded."
        elif aa[4] ==  2:
            print "Gradient and/or function calls not changing."
        gopt = dnlml(x,F,hyp,varargin)
        return convert_to_class(x,hyp), fopt, gopt, funcCalls

    elif Flag == 'BFGS':
        # Use BFGS
        aa = bfgs(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=False, full_output=True)
        x = aa[0]; fopt = aa[1]; gopt = aa[2]; Bopt = aa[3]; funcCalls = aa[4]; gradcalls = aa[5]
        if aa[6] == 1:
            print "Maximum number of iterations exceeded."
        elif aa[6] ==  2:
            print "Gradient and/or function calls not changing."
        if isinstance(fopt, ndarray):
            fopt = fopt[0]
        return convert_to_class(x,hyp), fopt, gopt, funcCalls

    elif Flag == 'SCG':
        # use sgc.py
        aa   = scg(x, nlml, dnlml, (F,hyp,varargin), niters = 100)
        hyp  = convert_to_class(aa[0],hyp)
        fopt = aa[1][-1]
        gopt = dnlml(aa[0],F,hyp,varargin)
        return hyp, fopt, gopt, len(aa[1])

    elif Flag == 'Minimize':
        # use minimize.py
        aa   = run(x, nlml, dnlml, (F,hyp,varargin), maxnumfuneval=-100)
        hyp  = convert_to_class(aa[0],hyp)
        fopt = aa[1][-1]
        gopt = dnlml(aa[0],F,hyp,varargin)
        return hyp, fopt, gopt, len(aa[1])

    else:
        raise Exception('Incorrect usage of optimization flag in min_wrapper')
예제 #8
0
def min_wrapper(hyp, F, Flag, *varargin):
    # Utilize scipy.optimize functions, sgc.py, or minimize.py to
    # minimize the negative log marginal liklihood.

    x = convert_to_array(hyp)  # convert the hyperparameter class to an array

    if Flag == 'CG':
        aa = cg(nlml,
                x,
                dnlml, (F, hyp, varargin),
                maxiter=100,
                disp=False,
                full_output=True)
        x = aa[0]
        fopt = aa[1]
        funcCalls = aa[2]
        gradcalls = aa[3]
        if aa[4] == 1:
            print "Maximum number of iterations exceeded."
        elif aa[4] == 2:
            print "Gradient and/or function calls not changing."
        gopt = dnlml(x, F, hyp, varargin)
        return convert_to_class(x, hyp), fopt, gopt, funcCalls

    elif Flag == 'BFGS':
        # Use BFGS
        aa = bfgs(nlml,
                  x,
                  dnlml, (F, hyp, varargin),
                  maxiter=100,
                  disp=False,
                  full_output=True)
        x = aa[0]
        fopt = aa[1]
        gopt = aa[2]
        Bopt = aa[3]
        funcCalls = aa[4]
        gradcalls = aa[5]
        if aa[6] == 1:
            print "Maximum number of iterations exceeded."
        elif aa[6] == 2:
            print "Gradient and/or function calls not changing."
        if isinstance(fopt, ndarray):
            fopt = fopt[0]
        return convert_to_class(x, hyp), fopt, gopt, funcCalls

    elif Flag == 'SCG':
        # use sgc.py
        aa = scg(x, nlml, dnlml, (F, hyp, varargin), niters=100)
        hyp = convert_to_class(aa[0], hyp)
        fopt = aa[1][-1]
        gopt = dnlml(aa[0], F, hyp, varargin)
        return hyp, fopt, gopt, len(aa[1])

    elif Flag == 'Minimize':
        # use minimize.py
        aa = run(x, nlml, dnlml, (F, hyp, varargin), maxnumfuneval=-100)
        hyp = convert_to_class(aa[0], hyp)
        fopt = aa[1][-1]
        gopt = dnlml(aa[0], F, hyp, varargin)
        return hyp, fopt, gopt, len(aa[1])

    else:
        raise Exception('Incorrect usage of optimization flag in min_wrapper')