def check_val_stats(model, pred_opt, data, hparams, X_ph, Y_ph, exp, sess, epoch): """ Runs through validation data to check the overall mean loss :param model: :param data: :param hparams: :param X_ph: :param Y_ph: :param exp: :param sess: :param epoch: :return: """ print('checking val loss...') max_val_batches = 100 val_gen = data.val_generator(batch_size=hparams.batch_size, max_epochs=1) overall_err = [] overall_p_1 = [] overall_p_2 = [] progbar = Progbar(target=max_val_batches, width=50) for batch_nb in range(max_val_batches): batch_X, batch_Y = next(val_gen) if len(batch_X) == 0: continue # aggregate data feed_dict = { X_ph: batch_X, Y_ph: batch_Y } # calculate metrics val_err = model.eval(session=sess, feed_dict=feed_dict) precission_at_1 = test_precision_at_k(pred_opt, feed_dict, k=1, sess=sess) precission_at_2 = test_precision_at_k(pred_opt, feed_dict, k=2, sess=sess) # track metrics for means overall_err.append(val_err) overall_p_1.append(precission_at_1) overall_p_2.append(precission_at_2) # update exp and progbar exp.add_metric_row({'val loss': val_err, 'val P@1': precission_at_1, 'val P@2': precission_at_2}) progbar.add(n=1) # log and save val metrics overall_val_mean_err = np.asarray(overall_err).mean() overall_p_1_mean = np.asarray(overall_p_1).mean() overall_p_2_mean = np.asarray(overall_p_2).mean() exp.add_metric_row({'epoch_mean_err': overall_val_mean_err, 'epoch_P@1_mean': overall_p_1_mean, 'epoch_P@2_mean': overall_p_2_mean, 'epoch': epoch + 1}) print('\nval loss: ', overall_val_mean_err, 'epoch_P@1_mean: ', overall_p_1_mean, 'epoch_P@2_mean: ', overall_p_2_mean) print('-'*100)
def check_val_stats(model, pred_opt, data, hparams, X_ph, Y_ph, exp, sess, epoch): print('checking val loss...') max_val_batches = 100 val_gen = data.val_generator(batch_size=hparams.batch_size, max_epochs=1) overall_err = [] overall_p_1 = [] overall_p_2 = [] progbar = Progbar(target=max_val_batches, width=50) for batch_nb in range(max_val_batches): batch_X, batch_Y = next(val_gen) if len(batch_X) == 0: continue feed_dict = {X_ph: batch_X, Y_ph: batch_Y} val_err = model.eval(session=sess, feed_dict=feed_dict) precission_at_1 = test_precision_at_k(pred_opt, feed_dict, k=1, sess=sess) precission_at_2 = test_precision_at_k(pred_opt, feed_dict, k=2, sess=sess) overall_err.append(val_err) overall_p_1.append(precission_at_1) overall_p_2.append(precission_at_2) exp.add_metric_row({ 'val loss': val_err, 'val P@1': precission_at_1, 'val P@2': precission_at_2 }) progbar.add(n=1) overall_val_mean_err = np.asarray(overall_err).mean() overall_p_1_mean = np.asarray(overall_p_1).mean() overall_p_2_mean = np.asarray(overall_p_2).mean() exp.add_metric_row({ 'epoch_mean_err': overall_val_mean_err, 'epoch_P@1_mean': overall_p_1_mean, 'epoch_P@2_mean': overall_p_2_mean, 'epoch': epoch + 1 }) print('\nval loss: ', overall_val_mean_err, 'epoch_P@1_mean: ', overall_p_1_mean, 'epoch_P@2_mean: ', overall_p_2_mean) print('-' * 100)
def train_main(hparams): """ Main training routine for the dot semantic network bot :return: """ # ----------------------- # INIT EXPERIMENT # ---------------------- exp = Experiment(name=hparams.exp_name, debug=hparams.debug, description=hparams.exp_desc, autosave=False, save_dir=hparams.test_tube_dir) exp.add_argparse_meta(hparams) exp.save() # ----------------------- # LOAD DATASET # ---------------------- udc_dataset = UDCDataset(vocab_path=hparams.vocab_path, train_path=hparams.dataset_train_path, test_path=hparams.dataset_test_path, val_path=hparams.dataset_val_path, max_seq_len=hparams.max_seq_len) # ----------------------- # INIT TF VARS # ---------------------- # input_x holds chat history # input_y holds our responses # labels holds the ground truth labels input_x = tf.placeholder(dtype=tf.int32, shape=[hparams.batch_size, None], name='input_x') input_y = tf.placeholder(dtype=tf.int32, shape=[hparams.batch_size, None], name='input_y') # ---------------------- # EMBEDDING LAYER # ---------------------- # you can preload your own or learn in the network # in this case we'll just learn it in the network embedding = tf.get_variable( 'embedding', [udc_dataset.vocab_size, hparams.embedding_dim]) # ---------------------- # RESOLVE EMBEDDINGS # ---------------------- # Lookup the embeddings. embedding_x = tf.nn.embedding_lookup(embedding, input_x) embedding_y = tf.nn.embedding_lookup(embedding, input_y) # Generates 1 vector per training example. x = tf.reduce_sum(embedding_x, axis=1) y = tf.reduce_sum(embedding_y, axis=1) # ---------------------- # OPTIMIZATION PROBLEM # ---------------------- S = dot_product_scoring(x, y, is_training=True) K = tf.reduce_logsumexp(S, axis=1) loss = -tf.reduce_mean(tf.diag_part(S) - K) # allow optimizer to be changed through hyper params optimizer = get_optimizer(hparams=hparams, minimize=loss) # ---------------------- # TF ADMIN (VAR INIT, SESS) # ---------------------- sess = tf.Session() init_vars = tf.global_variables_initializer() sess.run(init_vars) # Add ops to save and restore all the variables. saver = tf.train.Saver() # ---------------------- # TRAINING ROUTINE # ---------------------- # admin vars nb_batches_served = 0 eval_every_n_batches = hparams.eval_every_n_batches train_err = 1000 prec_at_1 = 0 prec_at_2 = 0 # iter for the needed epochs print('\n\n', '-' * 100, '\n {} TRAINING\n'.format(hparams.exp_name.upper()), '-' * 100, '\n\n') for epoch in range(hparams.nb_epochs): print('training epoch:', epoch + 1) progbar = Progbar(target=udc_dataset.nb_tng, width=50) train_gen = udc_dataset.train_generator(batch_size=hparams.batch_size, max_epochs=1) # mini batches for batch_context, batch_utterance in train_gen: feed_dict = {input_x: batch_context, input_y: batch_utterance} # OPT: run one step of optimization optimizer.run(session=sess, feed_dict=feed_dict) # update loss metrics if nb_batches_served % eval_every_n_batches == 0: # calculate test error train_err = loss.eval(session=sess, feed_dict=feed_dict) prec_at_1 = test_precision_at_k(S, feed_dict, k=1, sess=sess) prec_at_2 = test_precision_at_k(S, feed_dict, k=2, sess=sess) # update prog bar exp.add_metric_row({ 'tng loss': train_err, 'P@1': prec_at_1, 'P@2': prec_at_2 }) nb_batches_served += 1 progbar.add(n=len(batch_context), values=[('train_err', train_err), ('P@1', prec_at_1), ('P@2', prec_at_2)]) # ---------------------- # END OF EPOCH PROCESSING # ---------------------- # calculate the val loss print('\nepoch complete...\n') check_val_stats(loss, S, udc_dataset, hparams, input_x, input_y, exp, sess, epoch) # save model save_model(saver=saver, hparams=hparams, sess=sess, epoch=epoch) # save exp data exp.save() tf.reset_default_graph()
def main(): batch_size = _BATCH_SIZE noise_dim = _NOISE_DIM lamb = 10.0 train = get_data() train_images, train_labels = make_batch(train) gen = generator() dis = discriminator() gen.summary() dis.summary() dis_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9) gen_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9) gen.trainable = True dis.trainable = False gen_inputs = Input(shape=(noise_dim, )) gen_outputs = gen(gen_inputs) dis_outputs = dis(gen_outputs) gen_model = Model(inputs=[gen_inputs], outputs=[dis_outputs]) gen_model.compile(loss=wasserstein_loss, optimizer=gen_opt) gen_model.summary() gen.trainable = False dis.trainable = True real_inputs = Input(shape=train_images.shape[1:]) dis_real_outputs = dis(real_inputs) fake_inputs = Input(shape=(noise_dim, )) gen_fake_outputs = gen(fake_inputs) dis_fake_outputs = dis(gen_fake_outputs) interpolate = RandomWeightedAverage()([real_inputs, gen_fake_outputs]) dis_interpolate_outputs = dis(interpolate) gp_reg = partial(gradient_penalty, interpolate=interpolate, lamb=lamb) #gp_reg.__name__ = 'gradient_penalty' dis_model = Model(inputs=[real_inputs, fake_inputs],\ outputs=[dis_real_outputs, dis_fake_outputs,dis_interpolate_outputs]) dis_model.compile(loss=[wasserstein_loss, wasserstein_loss, gp_reg], optimizer=dis_opt) dis_model.summary() max_epoch = 10001 max_train_only_dis = 5 minibatch_size = batch_size * max_train_only_dis max_loop = int(train_images.shape[0] / minibatch_size) real = np.zeros((batch_size, train_images.shape[1], train_images.shape[2], train_images.shape[3]), dtype=np.float32) minibatch_train_images = np.zeros( (minibatch_size, train_images.shape[1], train_images.shape[2], train_images.shape[3]), dtype=np.float32) progbar = Progbar(target=max_epoch) real_label = [-1] * batch_size fake_label = [1] * batch_size dummy_label = [0] * batch_size for epoch in range(max_epoch): np.random.shuffle(train_images) for loop in range(max_loop): minibatch_train_images = train_images[loop * minibatch_size:(loop + 1) * minibatch_size] for train_only_dis in range(max_train_only_dis): real = minibatch_train_images[train_only_dis * batch_size:(train_only_dis + 1) * batch_size] noise = np.random.uniform( -1, 1, (batch_size, noise_dim)).astype(np.float32) dis_loss = dis_model.train_on_batch( [real, noise], [real_label, fake_label, dummy_label]) noise = np.random.uniform(-1, 1, (batch_size, noise_dim)).astype( np.float32) gen_loss = gen_model.train_on_batch(noise, real_label) progbar.add(1, values=[("dis_loss", dis_loss[0]), ("gen_loss", gen_loss)]) if epoch % 100 == 0: noise = np.random.uniform(-1, 1, (batch_size, 10)).astype(np.float32) fake = gen.predict(noise) tmp = [r.reshape(-1, 32) for r in fake] tmp = np.concatenate(tmp, axis=1) img = ((tmp / 2.0 + 0.5) * 255.0).astype(np.uint8) Image.fromarray(img).save("generate/%d.png" % (epoch)) backend.clear_session()
def main(): if os.path.isfile(macro._LOCAL_SAVE_DATA) == 0: # download data and compute featuers (see "download_data.py") # atomic_numbers use to compute composition vector # labels is target properties (formation energy) train_labels, compositions, features, atomic_numbers = dl.get_data() # compute bag-of-atom vector that trains GAN (see "preprocess.py") boa_vectors = pre.compute_bag_of_atom_vector(compositions, atomic_numbers) train_data = np.concatenate([boa_vectors, features], axis=1) save_data = pd.DataFrame( np.concatenate([train_labels, train_data], axis=1)) save_data.to_csv(macro._LOCAL_SAVE_DATA, index=False, header=False) else: data = pd.read_csv(macro._LOCAL_SAVE_DATA, delimiter=',', engine="python", header=None) data = np.array(data) train_labels, train_data = np.split(data, [1], axis=1) # normalization of training data such that min is 0 and max is 1 (see "preprocess.py") normalized_train_data, data_max, data_min = pre.normalize_for_train( train_data) normalized_train_labels, max_train_prop, min_train_prop = pre.normalize_for_train( train_labels) # Save normalization parameter to .csv to use generation save_data = pd.DataFrame( np.concatenate([max_train_prop, min_train_prop, data_max, data_min], axis=0)) save_data.to_csv(macro._SAVE_NORMALIZATION_PARAM, index=False, header=False) ### start initialization of training GAN ### # set hyperparameters batch_size = macro._BATCH_SIZE # batch size noise_dim = macro._NOISE_DIM # dimension of noise to input generator property_dim = macro._PROP_DIM # the number of properties lamb = macro._LAMB # hyperparameter for W-GAN-GP max_epoch = macro._MAX_EPOCH # maximum iteration of outer loop max_train_only_dis = macro._MAX_EPOCH_TRAIN_DISCRIMINATOR # maximum iteration of inner loop defined by W-GAN-GP paper (https://arxiv.org/pdf/1704.00028.pdf) max_loop = int(train_data.shape[0] / batch_size) # set model (see "model.py") # in this code, we apply AC-GAN based network architecture (https://arxiv.org/abs/1610.09585) # difference between AC-GAN is that our model is the regression, not classification gen = model.generator(normalized_train_data.shape[1]) dis = model.discriminator(normalized_train_data.shape[1]) # rf is the output layer of discriminator that discriminates real or fake rf = model.real_fake() # pred is the output layer of discriminator that predicts target property pred = model.prediction() # set optimization method dis_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9) gen_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9) # first set discriminator's parameters for training gen.trainable = False # generator's parameter does not update dis.trainable = True rf.trainable = True pred.trainable = True # set variables when inputting real data real_inputs = Input(shape=normalized_train_data.shape[1:]) dis_real_outputs = dis(real_inputs) real_fake_from_real = rf(dis_real_outputs) predictions_from_real = pred(dis_real_outputs) # set variables when inputting fake data fake_inputs = Input(shape=(noise_dim + property_dim, )) gen_fake_outputs = gen(fake_inputs) dis_fake_outputs = dis(gen_fake_outputs) real_fake_from_fake = rf(dis_fake_outputs) # set loss function for discriminator # in this case, we apply W-GAN-GP based loss function because of improving stability # W-GAN-GP (https://arxiv.org/pdf/1704.00028.pdf) # W-GAN-GP is unsupervised training, on the other hand, our model is supervised (conditional). # So, we apply wasserstein_loss to real_fake part and apply mean_squared_error to prediction part interpolate = model.RandomWeightedAverage()( [real_inputs, gen_fake_outputs]) dis_interpolate_outputs = dis(interpolate) real_fake_interpolate = rf(dis_interpolate_outputs) # gradient penalty of W-GAN-GP gp_reg = partial(model.gradient_penalty, interpolate=interpolate, lamb=lamb) gp_reg.__name__ = 'gradient_penalty' # connect inputs and outputs of the discriminator # prediction part is trained by only using training dataset (i.e., predict part is not trained by generated samples) dis_model = Model(inputs=[real_inputs, fake_inputs],\ outputs=[real_fake_from_real, real_fake_from_fake, real_fake_interpolate, predictions_from_real]) # compile dis_model.compile(loss=[model.wasserstein_loss,model.wasserstein_loss,\ gp_reg,'mean_squared_error'],optimizer=dis_opt) # second set generator's parameters for training gen.trainable = True # generator's parameters only update dis.trainable = False rf.trainable = False pred.trainable = False # set variables when inputting noise and target property gen_inputs = Input(shape=(noise_dim + property_dim, )) gen_outputs = gen(gen_inputs) # set variables for discriminator when inputting fake data dis_outputs = dis(gen_outputs) real_fake = rf(dis_outputs) predictions = pred(dis_outputs) # connect inputs and outputs of the discriminator gen_model = Model(inputs=[gen_inputs], outputs=[real_fake, predictions]) # compile # generator is trained by real_fake classification and prediction of target property gen_model.compile(loss=[model.wasserstein_loss, 'mean_squared_error'], optimizer=gen_opt) # if you need progress bar progbar = Progbar(target=max_epoch) # set the answer to train each model real_label = [-1] * batch_size fake_label = [1] * batch_size dummy_label = [0] * batch_size #real = np.zeros((batch_size,train_data.shape[1]), dtype=np.float32) inputs = np.zeros((batch_size, noise_dim + property_dim), dtype=np.float32) # epoch for epoch in range(max_epoch): # iteration for loop in range(max_loop): # shuffle to change the trainng order and select data sdata, slabels, bak = pre.paired_shuffle(normalized_train_data, normalized_train_labels) real = sdata[loop * batch_size:(loop + 1) * batch_size] properties = slabels[loop * batch_size:(loop + 1) * batch_size] # generator's parameters does not update gen.trainable = False dis.trainable = True rf.trainable = True pred.trainable = True # train discriminator for train_only_dis in range(max_train_only_dis): noise = np.random.uniform( -1, 1, (batch_size, noise_dim)).astype(np.float32) for i in range(len(noise)): inputs[i] = np.hstack((noise[i], properties[i])) dis_loss = dis_model.train_on_batch( [real, inputs], [real_label, fake_label, dummy_label, properties]) # second train only generator gen.trainable = True dis.trainable = False rf.trainable = False pred.trainable = False noise = np.random.uniform(-1, 1, (batch_size, noise_dim)).astype( np.float32) for i in range(len(noise)): inputs[i] = np.hstack((noise[i], properties[i])) gen_loss = gen_model.train_on_batch([inputs], [real_label, properties]) # if you need progress bar progbar.add(1, values=[("dis_loss", dis_loss[0]), ("gen_loss", gen_loss[0])]) # save generated samples and models eval.save(normalized_train_data, gen, dis, pred, rf) backend.clear_session()
def main(): train = get_data() train_images, train_labels = make_batch(train) dis = discriminator() dis.summary() dis_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9) dis.compile(loss='binary_crossentropy', optimizer=dis_opt) gen = generator() gen.summary() gen.trainable = True dis.trainable = False comb = combine(gen, dis) comb.summary() gen_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9) comb.compile(loss='binary_crossentropy', optimizer=gen_opt) batch_size = _BATCH_SIZE noise_dim = _NOISE_DIM max_epoch = 10001 max_train_only_dis = 5 minibatch_size = batch_size * max_train_only_dis max_loop = int(train_images.shape[0] / minibatch_size) real = np.zeros((batch_size, train_images.shape[1], train_images.shape[2], train_images.shape[3]), dtype=np.float32) minibatch_train_images = np.zeros( (minibatch_size, train_images.shape[1], train_images.shape[2], train_images.shape[3]), dtype=np.float32) progbar = Progbar(target=max_epoch) real_label = [-1] * batch_size fake_label = [1] * batch_size for epoch in range(max_epoch): np.random.shuffle(train_images) for loop in range(max_loop): minibatch_train_images = train_images[loop * minibatch_size:(loop + 1) * minibatch_size] for train_only_dis in range(max_train_only_dis): real = minibatch_train_images[train_only_dis * batch_size:(train_only_dis + 1) * batch_size] noise = np.random.uniform( -1, 1, (batch_size, noise_dim)).astype(np.float32) dis.trainable = False y = [1] * batch_size gen_loss = comb.train_on_batch(noise, y) dis.trainable = True y = [1] * batch_size + [0] * batch_size fake = gen.predict(noise) dis_loss = dis.train_on_batch(np.concatenate((real, fake)), y) progbar.add(1, values=[("dis_loss", dis_loss), ("gen_loss", gen_loss)]) if epoch % 100 == 0: tmp = [r.reshape(-1, 32) for r in fake] tmp = np.concatenate(tmp, axis=1) img = ((tmp / 2.0 + 0.5) * 255.0).astype(np.uint8) Image.fromarray(img).save("generate/%d.png" % (epoch)) backend.clear_session()
def train_main(hparams): """ Main training routine for the dot semantic network bot :return: """ # ----------------------- # INIT EXPERIMENT # ---------------------- exp = Experiment(name=hparams.exp_name, debug=hparams.debug, description=hparams.exp_desc, autosave=False, save_dir=hparams.test_tube_dir) exp.add_meta_tags(vars(hparams)) # ----------------------- # LOAD DATASET # ---------------------- udc_dataset = UDCDataset(vocab_path=hparams.vocab_path, train_path=hparams.dataset_train_path, test_path=hparams.dataset_test_path, val_path=hparams.dataset_val_path, max_seq_len=hparams.max_seq_len) # ----------------------- # INIT TF VARS # ---------------------- # context holds chat history # utterance holds our responses # labels holds the ground truth labels context_ph = tf.placeholder(dtype="string", shape=[ hparams.batch_size, ], name='context_seq_in') utterance_ph = tf.placeholder(dtype="string", shape=[ hparams.batch_size, ], name='utterance_seq_in') # ---------------------- # EMBEDDING LAYER # ---------------------- # you can preload your own or learn in the network # in this case we'll just learn it in the network # embedding_layer = tf.Variable(tf.random_uniform([udc_dataset.vocab_size, hparams.embedding_dim], -1.0, 1.0), name='embedding') #x = prep(udc_dataset.train, hparams.batch_size) #print(type(x)) #print(len(x)) # elmo_model = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True) sess = tf.Session() K.set_session(sess) # Initialize sessions sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) # print('elmo') # context = list(udc_dataset['Context']) # elmo_text = elmo(context, signature="default", as_dict=True) # input_text = Input(shape=(100,), tensor= ,dtype="string") #custom_layer = MyLayer(output_dim=1024, trainable=True)(tf.convert_to_tensor(x, dtype='string')) # embedding = Lambda(ELMoEmbedding, output_shape=(1024, ))(input_text) # elmo_text = elmo(tf.squeeze(tf.cast(x, tf.string)), signature="default", as_dict=True)["default"] #embedding_layer = Dense(256, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001))(custom_layer) print('embedding_layer') # ---------------------- # RESOLVE EMBEDDINGS # ---------------------- # look up embeddings context_embedding_custom0 = MyLayer(output_dim=1024, trainable=True)(tf.slice( context_ph, [0], [1])) utterance_embedding_custom0 = MyLayer(output_dim=1024, trainable=True)(tf.slice( utterance_ph, [0], [1])) print('context') print(tf.shape(context_embedding_custom0)) for batch_num in range(1, hparams.batch_size): context_embedding_custom = MyLayer(output_dim=1024, trainable=True)(tf.slice( context_ph, [batch_num], [1])) utterance_embedding_custom = MyLayer(output_dim=1024, trainable=True)( tf.slice(utterance_ph, [batch_num], [1])) context_embedding_custom0 = tf.concat( [context_embedding_custom0, context_embedding_custom], axis=0) utterance_embedding_custom0 = tf.concat( [utterance_embedding_custom0, utterance_embedding_custom], axis=0) print('concat') print(tf.shape(context_embedding_custom0)) #context_embedding_summed = tf.reduce_mean(context_embedding_custom0, axis=1) #utterance_embedding_summed = tf.reduce_mean(utterance_embedding_custom0, axis=1) #print('summed') #print(tf.shape(context_embedding_summed)) #context_embedding = Dense(hparams.embedding_dim, activation='relu', #kernel_regularizer=keras.regularizers.l2(0.001))( #context_embedding_custom0) #utterance_embedding = Dense(hparams.embedding_dim, activation='relu', #kernel_regularizer=keras.regularizers.l2(0.001))( #utterance_embedding_custom0) #print('embedding') #print(tf.shape(context_embedding)) # avg all embeddings (sum works better?) # this generates 1 vector per training example #context_embedding_summed = tf.reduce_mean(context_embedding, axis=1) #utterance_embedding_summed = tf.reduce_mean(utterance_embedding, axis=1) # ---------------------- # OPTIMIZATION PROBLEM # ---------------------- model, _, _, pred_opt = dot_semantic_nn( context=context_embedding_custom0, utterance=utterance_embedding_custom0, tng_mode=hparams.train_mode) # allow optiizer to be changed through hyper params optimizer = get_optimizer(hparams=hparams, minimize=model) # ---------------------- # TF ADMIN (VAR INIT, SESS) # ---------------------- sess = tf.Session() init_vars = tf.global_variables_initializer() sess.run(init_vars) # Add ops to save and restore all the variables. saver = tf.train.Saver() # ---------------------- # TRAINING ROUTINE # ---------------------- # admin vars nb_batches_served = 0 eval_every_n_batches = hparams.eval_every_n_batches train_err = 1000 precission_at_1 = 0 precission_at_2 = 0 # iter for the needed epochs print('\n\n', '-' * 100, '\n {} TRAINING\n'.format(hparams.exp_name.upper()), '-' * 100, '\n\n') for epoch in range(hparams.nb_epochs): print('training epoch:', epoch + 1) progbar = Progbar(target=udc_dataset.nb_tng, width=50) train_gen = udc_dataset.train_generator(batch_size=hparams.batch_size, max_epochs=1) # mini batches for batch_context, batch_utterance in train_gen: feed_dict = { context_ph: batch_context, utterance_ph: batch_utterance } print("optimizer!") # OPT: run one step of optimization optimizer.run(session=sess, feed_dict=feed_dict) # update loss metrics if nb_batches_served % eval_every_n_batches == 0: # calculate test error train_err = model.eval(session=sess, feed_dict=feed_dict) precission_at_1 = test_precision_at_k(pred_opt, feed_dict, k=1, sess=sess) precission_at_2 = test_precision_at_k(pred_opt, feed_dict, k=2, sess=sess) # update prog bar exp.add_metric_row({ 'tng loss': train_err, 'P@1': precission_at_1, 'P@2': precission_at_2 }) nb_batches_served += 1 progbar.add(n=len(batch_context), values=[('train_err', train_err), ('P@1', precission_at_1), ('P@2', precission_at_2)]) # ---------------------- # END OF EPOCH PROCESSING # ---------------------- # calculate the val loss print('\nepoch complete...\n') check_val_stats(model, pred_opt, udc_dataset, hparams, context_ph, utterance_ph, exp, sess, epoch) # save model save_model(saver=saver, hparams=hparams, sess=sess, epoch=epoch)
def check_val_stats(model, pred_opt, data, hparams, X_ph, Y_ph, exp, sess, epoch): """ Runs through validation data to check the overall mean loss :param model: :param data: :param hparams: :param X_ph: :param Y_ph: :param exp: :param sess: :param epoch: :return: """ print('checking val loss...') max_val_batches = 100 val_gen = data.val_generator(batch_size=hparams.batch_size, max_epochs=100) overall_err = [] overall_p_1 = [] overall_p_2 = [] progbar = Progbar(target=max_val_batches, width=50) for batch_nb in range(max_val_batches): batch_X, batch_Y = next(val_gen) if len(batch_X) == 0: continue # aggregate data feed_dict = {X_ph: batch_X, Y_ph: batch_Y} sims = pred_opt.eval(session=sess, feed_dict=feed_dict) file = open("result.txt", "a") for ban, paras in enumerate(zip(batch_X, batch_Y)): pred_num = [ i[0] for i in sorted(enumerate(sims[ban]), key=lambda x: x[1]) ][::-1][0] file.write("Question \n") file.writelines(paras[0] + "\n") file.writelines("\n") file.write("Right Answer \n") file.writelines(paras[1] + "\n") file.writelines("\n") file.write("Predicted Answer \n") file.writelines(batch_Y[pred_num] + "\n") file.writelines("*************************************\n") # calculate metrics val_err = model.eval(session=sess, feed_dict=feed_dict) precission_at_1 = test_precision_at_k(pred_opt, feed_dict, k=1, sess=sess) precission_at_2 = test_precision_at_k(pred_opt, feed_dict, k=2, sess=sess) # track metrics for means overall_err.append(val_err) overall_p_1.append(precission_at_1) overall_p_2.append(precission_at_2) # update exp and progbar exp.add_metric_row({ 'val loss': val_err, 'val P@1': precission_at_1, 'val P@2': precission_at_2 }) progbar.add(n=1) # log and save val metrics overall_val_mean_err = np.asarray(overall_err).mean() overall_p_1_mean = np.asarray(overall_p_1).mean() overall_p_2_mean = np.asarray(overall_p_2).mean() exp.add_metric_row({ 'epoch_mean_err': overall_val_mean_err, 'epoch_P@1_mean': overall_p_1_mean, 'epoch_P@2_mean': overall_p_2_mean, 'epoch': epoch + 1 }) print('\nval loss: ', overall_val_mean_err, 'epoch_P@1_mean: ', overall_p_1_mean, 'epoch_P@2_mean: ', overall_p_2_mean) print('-' * 100)
def train_main(hparams): """ Main training routine for the dot semantic network bot :return: """ # ----------------------- # INIT EXPERIMENT # ---------------------- exp = Experiment(name=hparams.exp_name, debug=hparams.debug, description=hparams.exp_desc, autosave=False, save_dir=hparams.test_tube_dir) exp.add_argparse_meta(hparams) exp.save() # ----------------------- # LOAD DATASET # ---------------------- udc_dataset = UDCDataset(vocab_path=hparams.vocab_path, train_path=hparams.dataset_train_path, test_path=hparams.dataset_test_path, val_path=hparams.dataset_val_path, max_seq_len=hparams.max_seq_len) # ----------------------- # INIT TF VARS # ---------------------- # context holds chat history # utterance holds our responses # labels holds the ground truth labels context_ph = tf.placeholder(dtype=tf.int32, shape=[hparams.batch_size, None], name='context_seq_in') utterance_ph = tf.placeholder(dtype=tf.int32, shape=[hparams.batch_size, None], name='utterance_seq_in') # ---------------------- # EMBEDDING LAYER # ---------------------- # you can preload your own or learn in the network # in this case we'll just learn it in the network embedding_layer = tf.Variable(tf.random_uniform( [udc_dataset.vocab_size, hparams.embedding_dim], -1.0, 1.0), name='embedding') # ---------------------- # RESOLVE EMBEDDINGS # ---------------------- # look up embeddings context_embedding = tf.nn.embedding_lookup(embedding_layer, context_ph) utterance_embedding = tf.nn.embedding_lookup(embedding_layer, utterance_ph) # avg all embeddings (sum works better?) # this generates 1 vector per training example context_embedding_summed = tf.reduce_mean(context_embedding, axis=1) utterance_embedding_summed = tf.reduce_mean(utterance_embedding, axis=1) # ---------------------- # OPTIMIZATION PROBLEM # ---------------------- model, _, _, pred_opt = dot_semantic_nn( context=context_embedding_summed, utterance=utterance_embedding_summed, tng_mode=hparams.train_mode) # allow optiizer to be changed through hyper params optimizer = get_optimizer(hparams=hparams, minimize=model) # ---------------------- # TF ADMIN (VAR INIT, SESS) # ---------------------- sess = tf.Session() init_vars = tf.global_variables_initializer() sess.run(init_vars) # Add ops to save and restore all the variables. saver = tf.train.Saver() # ---------------------- # TRAINING ROUTINE # ---------------------- # admin vars nb_batches_served = 0 eval_every_n_batches = hparams.eval_every_n_batches train_err = 1000 precission_at_1 = 0 precission_at_2 = 0 # iter for the needed epochs print('\n\n', '-' * 100, '\n {} TRAINING\n'.format(hparams.exp_name.upper()), '-' * 100, '\n\n') for epoch in range(hparams.nb_epochs): print('training epoch:', epoch + 1) progbar = Progbar(target=udc_dataset.nb_tng, width=50) train_gen = udc_dataset.train_generator(batch_size=hparams.batch_size, max_epochs=1) # mini batches for batch_context, batch_utterance in train_gen: feed_dict = { context_ph: batch_context, utterance_ph: batch_utterance } # OPT: run one step of optimization optimizer.run(session=sess, feed_dict=feed_dict) # update loss metrics if nb_batches_served % eval_every_n_batches == 0: # calculate test error train_err = model.eval(session=sess, feed_dict=feed_dict) precission_at_1 = test_precision_at_k(pred_opt, feed_dict, k=1, sess=sess) precission_at_2 = test_precision_at_k(pred_opt, feed_dict, k=2, sess=sess) # update prog bar exp.add_metric_row({ 'tng loss': train_err, 'P@1': precission_at_1, 'P@2': precission_at_2 }) nb_batches_served += 1 progbar.add(n=len(batch_context), values=[('train_err', train_err), ('P@1', precission_at_1), ('P@2', precission_at_2)]) # ---------------------- # END OF EPOCH PROCESSING # ---------------------- # calculate the val loss print('\nepoch complete...\n') check_val_stats(model, pred_opt, udc_dataset, hparams, context_ph, utterance_ph, exp, sess, epoch) # save model save_model(saver=saver, hparams=hparams, sess=sess, epoch=epoch) # save exp data exp.save()
def train_main(hparams): exp = Experiment(name=hparams.exp_name, debug=hparams.debug, description=hparams.exp_desc, autosave=False, save_dir=hparams.test_tube_dir) exp.add_argparse_meta(hparams) exp.save() udc_dataset = UDCDataset(vocab_path=hparams.vocab_path, train_path=hparams.dataset_train_path, test_path=hparams.dataset_test_path, val_path=hparams.dataset_val_path, max_seq_len=hparams.max_seq_len) context_ph = tf.placeholder(dtype=tf.int32, shape=[hparams.batch_size, None], name='context_seq_in') utterance_ph = tf.placeholder(dtype=tf.int32, shape=[hparams.batch_size, None], name='utterance_seq_in') embedding_layer = tf.Variable(tf.random_uniform( [udc_dataset.vocab_size, hparams.embedding_dim], -1.0, 1.0), name='embedding') context_embedding = tf.nn.embedding_lookup(embedding_layer, context_ph) utterance_embedding = tf.nn.embedding_lookup(embedding_layer, utterance_ph) context_embedding_summed = tf.reduce_mean(context_embedding, axis=1) utterance_embedding_summed = tf.reduce_mean(utterance_embedding, axis=1) model, _, _, pred_opt = dot_semantic_nn( context=context_embedding_summed, utterance=utterance_embedding_summed, tng_mode=hparams.train_mode) optimizer = get_optimizer(hparams=hparams, minimize=model) sess = tf.Session() init_vars = tf.global_variables_initializer() sess.run(init_vars) saver = tf.train.Saver() nb_batches_served = 0 eval_every_n_batches = hparams.eval_every_n_batches train_err = 1000 precission_at_1 = 0 precission_at_2 = 0 # iter for the needed epochs print('\n\n', '-' * 100, '\n {} TRAINING\n'.format(hparams.exp_name.upper()), '-' * 100, '\n\n') for epoch in range(hparams.nb_epochs): print('training epoch:', epoch + 1) progbar = Progbar(target=udc_dataset.nb_tng, width=50) train_gen = udc_dataset.train_generator(batch_size=hparams.batch_size, max_epochs=1) for batch_context, batch_utterance in train_gen: feed_dict = { context_ph: batch_context, utterance_ph: batch_utterance } optimizer.run(session=sess, feed_dict=feed_dict) if nb_batches_served % eval_every_n_batches == 0: train_err = model.eval(session=sess, feed_dict=feed_dict) precission_at_1 = test_precision_at_k(pred_opt, feed_dict, k=1, sess=sess) precission_at_2 = test_precision_at_k(pred_opt, feed_dict, k=2, sess=sess) exp.add_metric_row({ 'tng loss': train_err, 'P@1': precission_at_1, 'P@2': precission_at_2 }) nb_batches_served += 1 progbar.add(n=len(batch_context), values=[('train_err', train_err), ('P@1', precission_at_1), ('P@2', precission_at_2)]) print('\nepoch complete...\n') check_val_stats(model, pred_opt, udc_dataset, hparams, context_ph, utterance_ph, exp, sess, epoch) save_model(saver=saver, hparams=hparams, sess=sess, epoch=epoch) exp.save()
def transfer_model(source_df, target_df, test_df, method_flag, fold_num): source_labels, source_data = np.split(np.array(source_df),[1],axis=1) target_labels, target_data = np.split(np.array(target_df),[1],axis=1) test_labels, test_data = np.split(np.array(test_df),[1],axis=1) # normalization #normalized_source_data = pre.normalize(source_data) #normalized_target_data = pre.normalize(target_data) #normalized_test_data = pre.normalize(test_data) normalized_source_data = source_data normalized_target_data = target_data normalized_test_data = test_data ### constuct model for source domain task ### # optimization opt = Adam() # network setting latent = models.latent(normalized_source_data.shape[1]) sll = models.source_last_layer() tll = models.target_last_layer() source_inputs = Input(shape=normalized_source_data.shape[1:]) latent_features = latent(source_inputs) source_predictors = sll(latent_features) latent.trainable = mc._SORUCE_LATENT_TRAIN source_predictors.trainable = True source_nn = Model(inputs=[source_inputs], outputs=[source_predictors]) source_nn.compile(loss=['mean_squared_error'],optimizer=opt) #source_nn.summary() # training using source domain data if method_flag != mc._SCRATCH: source_max_loop = int(normalized_source_data.shape[0]/mc._BATCH_SIZE) source_progbar = Progbar(target=mc._SOURCE_EPOCH_NUM) for epoch in range(mc._SOURCE_EPOCH_NUM): shuffle_data, shuffle_labels, _ = pre.paired_shuffle(normalized_source_data,source_labels,1) for loop in range(source_max_loop): batch_train_data = shuffle_data[loop*mc._BATCH_SIZE:(loop+1)*mc._BATCH_SIZE] batch_train_labels = shuffle_labels[loop*mc._BATCH_SIZE:(loop+1)*mc._BATCH_SIZE] batch_train_labels = np.reshape(batch_train_labels, [len(batch_train_labels)]) one_hots = np.identity(mc._SOURCE_DIM_NUM)[np.array(batch_train_labels, dtype=np.int32)] loss = source_nn.train_on_batch([batch_train_data],[one_hots]) #source_progbar.add(1, values=[("source loss",loss)]) # save #latent.save('../results/source_latent.h5') #sll.save('../results/source_last_layer.h5') # compute relation vectors if method_flag == mc._SCRATCH or method_flag == mc._CONV_TRANSFER: target_vectors = np.identity(mc._TARGET_DIM_NUM)[np.array(target_labels, dtype=np.int32)] target_vectors = np.reshape(target_vectors, [target_vectors.shape[0], target_vectors.shape[2]]) elif method_flag == mc._COUNT_ATDL: target_labels, relations = rv.compute_relation_labels(source_nn, normalized_target_data, target_labels, fold_num) target_vectors = np.identity(mc._SOURCE_DIM_NUM)[np.array(target_labels, dtype=np.int32)] target_vectors = np.reshape(target_vectors, [target_vectors.shape[0], target_vectors.shape[2]]) else: relation_vectors = rv.compute_relation_vectors(source_nn, normalized_target_data, target_labels, fold_num, method_flag) target_vectors = np.zeros((len(target_labels),mc._SOURCE_DIM_NUM), dtype=np.float32) for i in range(len(target_labels)): target_vectors[i] = relation_vectors[int(target_labels[i])] ### tuning model for target domain task ### latent.trainable = mc._TARGET_LATENT_TRAIN target_inputs = Input(shape=normalized_target_data.shape[1:]) latent_features = latent(target_inputs) if method_flag == mc._SCRATCH or method_flag == mc._CONV_TRANSFER: predictors = tll(latent_features) label_num = mc._TARGET_DIM_NUM else: predictors= sll(latent_features) label_num = mc._SOURCE_DIM_NUM target_nn = Model(inputs=[target_inputs], outputs=[predictors]) target_nn.compile(loss=['mean_squared_error'],optimizer=opt) #target_nn.summary() # training using target domain data target_max_loop = int(normalized_target_data.shape[0]/mc._BATCH_SIZE) target_progbar = Progbar(target=mc._TARGET_EPOCH_NUM) for epoch in range(mc._TARGET_EPOCH_NUM): shuffle_data, shuffle_labels, _ = \ pre.paired_shuffle(normalized_target_data, target_vectors, label_num) for loop in range(target_max_loop): batch_train_data = shuffle_data[loop*mc._BATCH_SIZE:(loop+1)*mc._BATCH_SIZE] batch_train_labels = shuffle_labels[loop*mc._BATCH_SIZE:(loop+1)*mc._BATCH_SIZE] loss = target_nn.train_on_batch([batch_train_data],[batch_train_labels]) #target_progbar.add(1, values=[("target loss",loss)]) # compute outputs of test data of target domain x = target_nn.predict([normalized_test_data]) if method_flag == mc._SCRATCH or method_flag == mc._CONV_TRANSFER: idx = np.argmax(x, axis=1) elif method_flag == mc._COUNT_ATDL: idx = np.argmax(x,axis=1) for j in range(len(test_labels)): for i in range(mc._TARGET_DIM_NUM): if test_labels[j] == i: test_labels[j] = relations[i] break else: distance, idx = Neighbors(x, relation_vectors, 1) idx = idx[:,0] backend.clear_session() return idx.T, test_labels.T