def discriminator(src, tgt, opt, prefix='d_', is_prob_src=False, is_prob_tgt=False, is_reuse=None): W_norm_d = embedding_only(opt, prefix=prefix, is_reuse=is_reuse) # V E H_src, _ = encoder(src, W_norm_d, opt, prefix=prefix + 'src_', is_reuse=is_reuse, is_prob=is_prob_src) H_tgt, x_tgt = encoder(tgt, W_norm_d, opt, prefix=prefix + 'tgt_', is_reuse=is_reuse, is_prob=is_prob_tgt, is_padded=(opt.model == 'cnn_deconv')) # H : B F if opt.is_subtract_mean: mean_H = tf.reduce_mean(H_src, axis=0) H_src = H_src - mean_H H_tgt = H_tgt - mean_H logits = tf.reduce_sum(normalizing(H_src, 1) * normalizing( H_tgt, 1), 1) * (1 if opt.feature_matching == 'pair_diff' else opt.L) return logits, tf.squeeze(tf.concat([H_src, H_tgt], 1)), x_tgt
def pair_discriminator(src, tgt, opt, l_temp = 1, prefix = 'd_', is_prob_src = False, is_prob_tgt= False, is_reuse = None): W_norm_d = embedding_only(opt, prefix = prefix, is_reuse = is_reuse) # V E H_src, H_src_1 = encoder(src, W_norm_d, opt, l_temp = l_temp, prefix = prefix , is_reuse = is_reuse, is_prob=is_prob_src) H_tgt, H_tgt_1 = encoder(tgt, W_norm_d, opt, l_temp = l_temp, prefix = prefix , is_reuse = True, is_prob=is_prob_tgt) if opt.model == 'D': logits = tf.reduce_sum(normalizing(H_src, 1)*normalizing(H_tgt, 1),1) elif opt.model == 'C': logits = classifier_2layer(tf.concat([H_src, H_tgt], 1), opt, prefix= prefix, is_reuse = is_reuse) logits = tf.squeeze(logits) else: # N logits = tf.reduce_sum((H_src*H_tgt - 0.5),1) return logits, H_src, H_tgt, H_src_1, H_tgt_1 #tf.squeeze(tf.concat([H_src, H_tgt], 1))
def embedding(features, opt, prefix='', is_reuse=None): """Customized function to transform batched x into embeddings.""" # Convert indexes of words into embeddings. # b = tf.get_variable('b', [opt.embed_size], initializer = tf,random_uniform_initializer(-0.01, 0.01)) with tf.variable_scope(prefix + 'embed', reuse=is_reuse): if opt.fix_emb: assert (hasattr(opt, 'emb')) assert (np.shape(np.array(opt.emb)) == (opt.n_words, opt.embed_size)) W = tf.get_variable('W', [opt.n_words, opt.embed_size], weights_initializer=opt.emb, is_trainable=False) else: weightInit = tf.random_uniform_initializer(-0.001, 0.001) W = tf.get_variable('W', [opt.n_words, opt.embed_size], initializer=weightInit) # tf.stop_gradient(W) if hasattr(opt, 'relu_w') and opt.relu_w: W = tf.nn.relu(W) W_norm = normalizing(W, 1) word_vectors = tf.nn.embedding_lookup(W_norm, features) return word_vectors, W_norm
def embedding_only(opt, prefix='', is_reuse=None): """Customized function to transform batched x into embeddings.""" # Convert indexes of words into embeddings. with tf.variable_scope(prefix + 'embed', reuse=is_reuse): if opt.fix_emb: assert (hasattr(opt, 'emb')) assert (np.shape(np.array(opt.emb)) == (opt.n_words, opt.embed_size)) W = tf.get_variable('W', [opt.n_words, opt.embed_size], weights_initializer=opt.emb, is_trainable=False) else: if hasattr(opt, 'emb') and opt.emb: assert (np.shape(np.array(opt.emb)) == (opt.n_words, opt.embed_size)) weightInit = opt.emb W = tf.get_variable('W', initializer=weightInit) else: weightInit = emb_init W = tf.get_variable('W', [opt.n_words, opt.embed_size], initializer=weightInit) if hasattr(opt, 'relu_w') and opt.relu_w: W = tf.nn.relu(W) W_norm = normalizing(W, 1) return W_norm
def auto_encoder(x, x_org, opt, opt_t=None): # print x.get_shape() # batch L if not opt_t: opt_t = opt x_emb, W_norm = embedding(x, opt) # batch L emb x_emb = tf.expand_dims(x_emb, 3) # batch L emb 1 res = {} # cnn encoder if opt.layer == 4: H_enc = conv_model_4layer(x_emb, opt) elif opt.layer == 3: H_enc = conv_model_3layer(x_emb, opt) else: H_enc = conv_model(x_emb, opt) # H_dec = layers.relu(Y4, 200, biases_initializer=biasInit) H_dec = H_enc # print x_rec.get_shape() # deconv decoder if opt.layer == 4: x_rec = deconv_model_4layer(H_dec, opt_t) # batch L emb 1 elif opt.layer == 3: x_rec = deconv_model_3layer(H_dec, opt_t) # batch L emb 1 else: x_rec = deconv_model(H_dec, opt_t) # batch L emb 1 print("Encoder len %d Decoder len %d Output len %d" % (x_emb.get_shape()[1], x_rec.get_shape()[1], x_org.get_shape()[1])) tf.assert_equal(x_rec.get_shape(), x_emb.get_shape()) tf.assert_equal(x_emb.get_shape()[1], x_org.get_shape()[1]) x_rec_norm = normalizing(x_rec, 2) # batch L emb # W_reshape = tf.reshape(tf.transpose(W),[1,1,opt.embed_size,opt.n_words]) # print all_idx.get_shape() if opt.fix_emb: # loss = tf.reduce_sum((x_emb-x_rec)**2) # L2 is bad # cosine sim # Batch L emb loss = -tf.reduce_sum(x_rec_norm * x_emb) rec_sent = tf.argmax( tf.tensordot(tf.squeeze(x_rec_norm), W_norm, [[2], [1]]), 2) res['rec_sents'] = rec_sent # print rec_sent.get_shape() # rec_sent = tf.argmax(tf.reduce_mean(x_rec_norm[0,:,:] * W_reshape, 2),2) else: x_temp = tf.reshape(x_org, [ -1, ]) prob_logits = tf.tensordot(tf.squeeze(x_rec_norm), W_norm, [[2], [1]]) # c_blv = sum_e x_ble W_ve prob = tf.nn.log_softmax(prob_logits * opt_t.L, dim=-1, name=None) # prob = normalizing(tf.reduce_sum(x_rec_norm * W_reshape, 2), 2) # prob = softmax_prediction(x_rec_norm, opt) rec_sent = tf.squeeze(tf.argmax(prob, 2)) prob = tf.reshape(prob, [-1, opt_t.n_words]) idx = tf.range(opt.batch_size * opt_t.sent_len) # print idx.get_shape(), idx.dtype all_idx = tf.transpose(tf.stack(values=[idx, x_temp])) all_prob = tf.gather_nd(prob, all_idx) gen_temp = tf.cast(tf.reshape(rec_sent, [ -1, ]), tf.int32) gen_idx = tf.transpose(tf.stack(values=[idx, gen_temp])) gen_prob = tf.gather_nd(prob, gen_idx) res['rec_sents'] = rec_sent res['gen_p'] = tf.exp(gen_prob[0:opt.sent_len]) res['all_p'] = tf.exp(all_prob[0:opt.sent_len]) if opt.discrimination: logits_real, _ = discriminator(x_org, W_norm, opt_t) prob_one_hot = tf.nn.log_softmax(prob_logits * opt_t.L * 100, dim=-1, name=None) logits_syn, _ = discriminator(tf.exp(prob_one_hot), W_norm, opt_t, is_prob=True, is_reuse=True) res['prob_r'] = tf.reduce_mean(tf.nn.sigmoid(logits_real)) res['prob_f'] = tf.reduce_mean(tf.nn.sigmoid(logits_syn)) loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(logits_real), logits=logits_real)) + \ tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(logits_syn), logits=logits_syn)) else: loss = -tf.reduce_mean(all_prob) # *tf.cast(tf.not_equal(x_temp,0), tf.float32) tf.summary.scalar('loss', loss) train_op = layers.optimize_loss(loss, framework.get_global_step(), optimizer='Adam', learning_rate=opt.lr) return res, loss, train_op
def deconv_decoder(H_dec, x_org, W_norm, is_train, opt, res, prefix='', is_reuse=None): if hasattr(opt, 'multiplier'): multiplier = opt.multiplier else: multiplier = 2 # H_dec batch 1 1 n_gan if opt.layer == 4: x_rec = deconv_model_4layer(H_dec, opt, is_train=is_train, prefix=prefix, is_reuse=is_reuse) # batch L emb 1 elif opt.layer == 3: x_rec = deconv_model_3layer(H_dec, opt, is_train=is_train, multiplier=multiplier, prefix=prefix, is_reuse=is_reuse) # batch L emb 1 elif opt.layer == 0: x_rec = deconv_model_3layer(H_dec, opt, prefix=prefix, is_reuse=is_reuse) # batch L emb 1 else: x_rec = deconv_model(H_dec, opt, is_train=is_train, prefix=prefix, is_reuse=is_reuse) # batch L emb 1 print("Decoder len %d Output len %d" % (x_rec.get_shape()[1], x_org.get_shape()[1])) tf.assert_equal(x_rec.get_shape()[1], x_org.get_shape()[1]) x_rec_norm = normalizing(x_rec, 2) # batch L emb #W_reshape = tf.reshape(tf.transpose(W),[1,1,opt.embed_size,opt.n_words]) #print all_idx.get_shape() # if opt.fix_emb: # # #loss = tf.reduce_sum((x_emb-x_rec)**2) # L2 is bad # # cosine sim # # Batch L emb # loss = -tf.reduce_sum(x_rec_norm * x_emb) # rec_sent = tf.argmax(tf.tensordot(tf.squeeze(x_rec_norm) , W_norm, [[2],[1]]),2) # res['rec_sents'] = rec_sent # # else: x_temp = tf.reshape(x_org, [ -1, ]) if hasattr(opt, 'attentive_emb') and opt.attentive_emb: emb_att = tf.get_variable(prefix + 'emb_att', [1, opt.embed_size], initializer=tf.constant_initializer( 1.0, dtype=tf.float32)) prob_logits = tf.tensordot(tf.squeeze(x_rec_norm), emb_att * W_norm, [[2], [1]]) # c_blv = sum_e x_ble W_ve else: prob_logits = tf.tensordot(tf.squeeze(x_rec_norm), W_norm, [[2], [1]]) # c_blv = sum_e x_ble W_ve prob = tf.nn.log_softmax(prob_logits * opt.L, dim=-1, name=None) #prob = normalizing(tf.reduce_sum(x_rec_norm * W_reshape, 2), 2) #prob = softmax_prediction(x_rec_norm, opt) rec_sent = tf.squeeze(tf.argmax(prob, 2)) prob = tf.reshape(prob, [-1, opt.n_words]) idx = tf.range(opt.batch_size * opt.sent_len) #print idx.get_shape(), idx.dtype all_idx = tf.transpose(tf.stack(values=[idx, x_temp])) all_prob = tf.gather_nd(prob, all_idx) #pdb.set_trace() gen_temp = tf.cast(tf.reshape(rec_sent, [ -1, ]), tf.int32) gen_idx = tf.transpose(tf.stack(values=[idx, gen_temp])) gen_prob = tf.gather_nd(prob, gen_idx) res['rec_sents'] = rec_sent #res['gen_p'] = tf.exp(gen_prob[0:opt.sent_len]) #res['all_p'] = tf.exp(all_prob[0:opt.sent_len]) if opt.discrimination: logits_real, _ = discriminator(x_org, W_norm, opt) prob_one_hot = tf.nn.log_softmax(prob_logits * opt.L, dim=-1, name=None) logits_syn, _ = discriminator(tf.exp(prob_one_hot), W_norm, opt, is_prob=True, is_reuse=True) res['prob_r'] = tf.reduce_mean(tf.nn.sigmoid(logits_real)) res['prob_f'] = tf.reduce_mean(tf.nn.sigmoid(logits_syn)) loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.ones_like(logits_real), logits = logits_real)) + \ tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.zeros_like(logits_syn), logits = logits_syn)) else: loss = -tf.reduce_mean(all_prob) return loss, res
weights = ['uniform', 'distance'] T = 10 k = 5 skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=None) #save the result: result = [] for t in range(T): for tr_index, v_index in skf.split(X, y): #training data X_tr = X[tr_index, :] y_tr = y[tr_index] #validation data X_val = X[v_index, :] y_val = y[v_index] #standardize (X_tr_norm, X_val_norm) = utils.normalizing(X_tr, X_val) #Ridge regression on all features: result_L = [] for w in weights: for n in N_K: result_l = modules.knn(n, w, X_tr_norm, y_tr, X_val_norm, y_val) result_L.append(result_l) result.append(result_L) mean_Etrain = np.mean(np.array(result)[:, :, 2], axis=0) var_Etrain = np.var(np.array(result)[:, :, 2], axis=0) mean_Eval = np.mean(np.array(result)[:, :, 3], axis=0) var_Eval = np.var(np.array(result)[:, :, 3], axis=0) best_n_uniform = N_K[np.argmin(mean_Eval[0:9])] best_n_distance = N_K[np.argmin(mean_Eval[9:18])]
import numpy as np import utils import pickle from sklearn.ensemble import GradientBoostingRegressor from sklearn.metrics import mean_squared_error (X_training, y_training) = utils.load( "/Users/mac-pro/Desktop/20Fall/EE660/HW/Final/code/training.csv") (X_test, y_test) = utils.load( "/Users/mac-pro/Desktop/20Fall/EE660/HW/Final/code/testing.csv") (X_training_norm, X_test_norm) = utils.normalizing(X_training, X_test) #load trained model #https://machinelearningmastery.com/save-load-machine-learning-models-python-scikit-learn/ filename = '/Users/mac-pro/Desktop/20Fall/EE660/HW/Final/code/final_model.sav' loaded_model = pickle.load(open(filename, 'rb')) y_training_pred = loaded_model.predict(X_training_norm) y_test_pred = loaded_model.predict(X_test_norm) error_training = np.sqrt(mean_squared_error(y_training, y_training_pred)) error_test = np.sqrt(mean_squared_error(y_test, y_test_pred)) print("Final result with Gradient boosting model:") print("RMSE on training dataset = " + str(error_training)) print("RMSE on test dataset = " + str(error_test)) print("Features importance: ") print(loaded_model.feature_importances_)
for l in L: (coef, y_train_pred, y_test_pred, error_train, error_test) = modules.lasso(l, Dpt_tr_std, ypt_tr, Dpt_test_std, ypt_test) etrain.append(error_train) etest.append(error_test) plt.plot(np.log10(L), etrain, 'g', label="train") plt.plot(np.log10(L), etest, 'b', label="test") plt.legend() plt.show() ''' ------------------------------------------------ nonlinear regression ''' #normalize pretraining data for non linear model (Dpt_tr_norm, Dpt_test_norm) = utils.normalizing(Dpt_tr, Dpt_test) #Baseline: KNN: weights = ['uniform', 'distance'], k = 1 to 10 for k in range(1, 10): (y_train_pred, y_test_pred, error_train, error_test) = modules.knn(k, 'distance', Dpt_tr_norm, ypt_tr, Dpt_test_norm, ypt_test) print("----") print(k) print(error_train, error_test) ''' using ABM model (tree based) ''' #CART: min_impurity_decrease = [0.001, 0.1, 1, 10, 100, 1000] and plot --- find not overfitting region # then try similar leafs around and plot, choose simplist model around +- 1 std Etest min_impurity_decrease = 10