def sample_true_x_y(batch_size, X_train, y_train): rand_batch_indices = np.random.randint(0, X_train.shape[0], batch_size) x_batch_train = X_train[rand_batch_indices] y_batch_train = y_train[rand_batch_indices] unrolled_x = utils.convert_to_array(x_batch_train) unrolled_y = utils.convert_to_array(y_batch_train) return unrolled_x, unrolled_y
def main(): xml_file_test = '../../data/Restaurants_Test_Gold.xml' xml_file_train = '../../data/Restaurants_Train.xml' yelp_path = '../../data/yelp_lexicon' brown_path = '../../data/brown_clusters_tweets/brown_clusters_tweets' prompt = "Please enter path to Restaurants_Test_Gold.xml file: " xml_file_test_input = input(prompt) if xml_file_test_input == '': pass else: xml_file_test = xml_file_test_input prompt = "Please enter path to Restaurants_Train.xml file: " xml_file_train_input = input(prompt) if xml_file_train_input == '': pass else: xml_file_train = xml_file_train_input prompt = "Please enter path to yelp lexicon features folder: " yelp_path_input = input(prompt) if yelp_path_input == '': pass else: yelp_path = yelp_path_input prompt = "Please enter path to brown_clusters_tweets file: " brown_path_input = input(prompt) if brown_path_input == '': pass else: brown_path = brown_path_input df_test = xml_category_df(xml_file_test) df_train = xml_category_df(xml_file_train) df_train['stemmed_text'] = stem_series(df_train.text) df_test['stemmed_text'] = stem_series(df_test.text) dict_list = load_lexicon_features(yelp_path) df_new_train = text_to_score(df_train.text, dict_list) df_new_test = text_to_score(df_test.text, dict_list) df_test, df_train = concat_dfs(df_test, df_new_test, df_train, df_new_train) test_texts = list(df_test.text) train_texts = list(df_train.text) cluster_dict = add_brown_clusters(brown_path) one_hot_test_list = [one_hotter(cluster_dict, text) for text in test_texts] one_hot_train_list = [ one_hotter(cluster_dict, train) for train in train_texts ] training_array_clusters = convert_to_array(one_hot_train_list) test_array_clusters = convert_to_array(one_hot_test_list) X_train, y_train = n_gram_builder(df_train, df_test, training_array_clusters, test_array_clusters) df_train, df_test = populate_df_with_binary_cols_per_label( df_test, df_train) predict(X_train, y_train, df_train, df_test)
def predict_sequence(test_x, test_y, seq_len, vocab_size, batch_size): avg_test_loss = [] m_loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) test_dataset_in = tf.data.Dataset.from_tensor_slices( (test_x)).batch(batch_size) test_dataset_out = tf.data.Dataset.from_tensor_slices( (test_y)).batch(batch_size) i = 0 loaded_encoder = tf.keras.models.load_model( "data/generated_files/enc_model") loaded_generator = tf.keras.models.load_model( "data/generated_files/gen_model") true_x = list() true_y = list() predicted_y = list() for step, (x, y) in enumerate(zip(test_dataset_in, test_dataset_out)): batch_x_test = utils.convert_to_array(x) batch_y_test = utils.convert_to_array(y) if batch_x_test.shape[0] == batch_size: new_tokens = tf.fill([batch_size, 1], 0) noise = tf.random.normal((batch_size, enc_units)) enc_output, enc_state = loaded_encoder(batch_x_test, training=False) enc_state = tf.math.add(enc_state, noise) dec_state = enc_state #generated_logits, state = loaded_generator([new_tokens, enc_state], training=False) #loss = m_loss(batch_y_test, generated_logits) generated_logits, _, loss = gen_step_predict( seq_len, batch_size, vocab_size, loaded_generator, dec_state, batch_y_test) p_y = tf.math.argmax(generated_logits, axis=-1)[1] one_x = utils.convert_to_string_list(batch_x_test[1]) one_y = utils.convert_to_string_list(batch_y_test[1]) pred_y = utils.convert_to_string_list(p_y) true_x.append(one_x) true_y.append(one_y) predicted_y.append(pred_y) print("Test: Batch {} loss: {}".format(str(i), str(loss))) avg_test_loss.append(loss) i += 1 true_predicted_df = pd.DataFrame( list(zip(true_x, true_y, predicted_y)), columns=["True_X", "True_Y", "Predicted_Y"]) true_predicted_df.to_csv("data/generated_files/true_predicted_df.csv", index=None) mean_loss = np.mean(avg_test_loss) print("Total test loss: {}".format(str(mean_loss))) return mean_loss
def min_wrapper(hyp, F, Flag, *varargin): # Utilize scipy.optimize functions to minimize the negative log marginal liklihood. This is REALLY inefficient! x = convert_to_array(hyp) if Flag == 'CG': aa = cg(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=False, full_output=True) x = aa[0]; fx = aa[1]; funcCalls = aa[2]; gradcalls = aa[3] if aa[4] == 1: print "Maximum number of iterations exceeded." elif aa[4] == 2: print "Gradient and/or function calls not changing." gvals = dnlml(x,F,hyp,varargin) return convert_to_class(x,hyp), fx, gvals, funcCalls elif Flag == 'BFGS': # Use BFGS aa = bfgs(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=True, full_output=True) x = aa[0]; fvals = aa[1]; gvals = aa[2]; Bopt = aa[3]; funcCalls = aa[4]; gradcalls = aa[5] if aa[6] == 1: print "Maximum number of iterations exceeded." elif aa[6] == 2: print "Gradient and/or function calls not changing." return convert_to_class(x,hyp), fvals, gvals, funcCalls else: raise Exception('Incorrect usage of optimization flag in min_wrapper')
def dnlml(x,F,*varargin): hyp = varargin[0] temp = list(varargin[1:][0]) temp[-1] = True f = lambda z: F(z,*temp) X = convert_to_class(x,hyp) vargout = f(X) z = convert_to_array(vargout[1]) return z
def dnlml(x, F, *varargin): hyp = varargin[0] temp = list(varargin[1:][0]) temp[-1] = True f = lambda z: F(z, *temp) X = convert_to_class(x, hyp) vargout = f(X) z = convert_to_array(vargout[1]) return z
def min_wrapper(hyp, F, Flag, *varargin): # Utilize scipy.optimize functions, sgc.py, or minimize.py to # minimize the negative log marginal liklihood. x = convert_to_array(hyp) # convert the hyperparameter class to an array if Flag == 'CG': aa = cg(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=False, full_output=True) x = aa[0]; fopt = aa[1]; funcCalls = aa[2]; gradcalls = aa[3] if aa[4] == 1: print "Maximum number of iterations exceeded." elif aa[4] == 2: print "Gradient and/or function calls not changing." gopt = dnlml(x,F,hyp,varargin) return convert_to_class(x,hyp), fopt, gopt, funcCalls elif Flag == 'BFGS': # Use BFGS aa = bfgs(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=False, full_output=True) x = aa[0]; fopt = aa[1]; gopt = aa[2]; Bopt = aa[3]; funcCalls = aa[4]; gradcalls = aa[5] if aa[6] == 1: print "Maximum number of iterations exceeded." elif aa[6] == 2: print "Gradient and/or function calls not changing." if isinstance(fopt, ndarray): fopt = fopt[0] return convert_to_class(x,hyp), fopt, gopt, funcCalls elif Flag == 'SCG': # use sgc.py aa = scg(x, nlml, dnlml, (F,hyp,varargin), niters = 100) hyp = convert_to_class(aa[0],hyp) fopt = aa[1][-1] gopt = dnlml(aa[0],F,hyp,varargin) return hyp, fopt, gopt, len(aa[1]) elif Flag == 'Minimize': # use minimize.py aa = run(x, nlml, dnlml, (F,hyp,varargin), maxnumfuneval=-100) hyp = convert_to_class(aa[0],hyp) fopt = aa[1][-1] gopt = dnlml(aa[0],F,hyp,varargin) return hyp, fopt, gopt, len(aa[1]) else: raise Exception('Incorrect usage of optimization flag in min_wrapper')
def min_wrapper(hyp, F, Flag, *varargin): # Utilize scipy.optimize functions, sgc.py, or minimize.py to # minimize the negative log marginal liklihood. x = convert_to_array(hyp) # convert the hyperparameter class to an array if Flag == 'CG': aa = cg(nlml, x, dnlml, (F, hyp, varargin), maxiter=100, disp=False, full_output=True) x = aa[0] fopt = aa[1] funcCalls = aa[2] gradcalls = aa[3] if aa[4] == 1: print "Maximum number of iterations exceeded." elif aa[4] == 2: print "Gradient and/or function calls not changing." gopt = dnlml(x, F, hyp, varargin) return convert_to_class(x, hyp), fopt, gopt, funcCalls elif Flag == 'BFGS': # Use BFGS aa = bfgs(nlml, x, dnlml, (F, hyp, varargin), maxiter=100, disp=False, full_output=True) x = aa[0] fopt = aa[1] gopt = aa[2] Bopt = aa[3] funcCalls = aa[4] gradcalls = aa[5] if aa[6] == 1: print "Maximum number of iterations exceeded." elif aa[6] == 2: print "Gradient and/or function calls not changing." if isinstance(fopt, ndarray): fopt = fopt[0] return convert_to_class(x, hyp), fopt, gopt, funcCalls elif Flag == 'SCG': # use sgc.py aa = scg(x, nlml, dnlml, (F, hyp, varargin), niters=100) hyp = convert_to_class(aa[0], hyp) fopt = aa[1][-1] gopt = dnlml(aa[0], F, hyp, varargin) return hyp, fopt, gopt, len(aa[1]) elif Flag == 'Minimize': # use minimize.py aa = run(x, nlml, dnlml, (F, hyp, varargin), maxnumfuneval=-100) hyp = convert_to_class(aa[0], hyp) fopt = aa[1][-1] gopt = dnlml(aa[0], F, hyp, varargin) return hyp, fopt, gopt, len(aa[1]) else: raise Exception('Incorrect usage of optimization flag in min_wrapper')