def testset_evaluation(self, D_test, D_C_test, prod_to_sent, flic, num_samples=20, random_sampling=False): probs_pos = 0 probs_neg = 0 probs_pos_count = 0 probs_neg_count = 0 tot_cost = 0 sampled_sentences = [] costs = [] num_errors = 0.0 for context, x, y in training_sample(D_test, D_C_test): mean_prob, mean_cost = self.validate_t(flic[context], flic[x], y) costs.append(mean_cost) tot_cost += mean_cost if y == 1: probs_pos += mean_prob probs_pos_count += 1 else: probs_neg += mean_prob probs_neg_count += 1 if abs(mean_prob - y) > 0.5: num_errors += 1 return sampled_sentences, 1 - num_errors / (probs_pos_count + probs_neg_count), \ (tot_cost / (probs_pos_count + probs_neg_count), probs_pos / probs_pos_count, probs_neg / probs_neg_count)
def validation_cost(self, D_valid, D_C_valid, prod_to_sent, flic, num_samples=10): probs_pos = 0 probs_neg = 0 probs_pos_count = 0 probs_neg_count = 0 tot_cost = 0 num_errors = 0.0 for context, x, y in training_sample(D_valid, D_C_valid): mean_prob, mean_cost = self.validate_t(flic[context], flic[x], y) if abs(mean_prob - y) > 0.5: num_errors += 1 tot_cost += mean_cost if y == 1: probs_pos += mean_prob probs_pos_count += 1 else: probs_neg += mean_prob probs_neg_count += 1 count = probs_pos_count + probs_neg_count if probs_pos_count > 0 and probs_neg_count > 0: return 1 - num_errors / count, tot_cost / count, probs_pos / probs_pos_count, probs_neg / probs_neg_count else: return 0, 0, 0, 0
def testset_evaluation(self, D_test, D_C_test, prod_to_sent, flic, num_samples = 20, random_sampling=False): self.dropout_encoder.set_value(0.0) self.dropout_generator.set_value(0.0) probs_pos = 0 probs_neg = 0 probs_pos_count = 0 probs_neg_count = 0 tot_cost = 0 sampled_sentences = [] costs = [] num_errors = 0.0 for context, x, y in training_sample(D_test, D_C_test): sent_embs = prod_to_sent[x][0] samples, max_sents_count = self.generator.sample(sent_embs.shape[0], num_samples) mean_prob, mean_cost = self.validate_t(sent_embs, samples, max_sents_count, flic[context], y) sampled_sentences.append((context, x, get_strings_from_samples(prod_to_sent[x][1], samples))) costs.append(mean_cost) tot_cost += mean_cost if y == 1: probs_pos += mean_prob probs_pos_count += 1 else: probs_neg += mean_prob probs_neg_count += 1 if abs(mean_prob - y) > 0.5: num_errors += 1 self.dropout_generator.set_value(self.args.dropout_generator) self.dropout_encoder.set_value(self.args.dropout_encoder) return sampled_sentences, 1 - num_errors / (probs_pos_count + probs_neg_count), \ (tot_cost / (probs_pos_count + probs_neg_count), probs_pos / probs_pos_count, probs_neg / probs_neg_count)
def validation_cost(self, D_valid, D_C_valid, prod_to_sent, flic, num_samples = 10): probs_pos = 0 probs_neg = 0 probs_pos_count = 0 probs_neg_count = 0 tot_cost = 0 num_errors = 0.0 self.dropout_encoder.set_value(0.0) self.dropout_generator.set_value(0.0) for context, x, y in training_sample(D_valid, D_C_valid): sent_embs = prod_to_sent[x][0] samples, max_sents_count = self.generator.sample(sent_embs.shape[0], num_samples) mean_prob, mean_cost = self.validate_t(sent_embs, samples, max_sents_count, flic[context], y) if abs(mean_prob - y) > 0.5: num_errors += 1 tot_cost += mean_cost if y == 1: probs_pos += mean_prob probs_pos_count += 1 else: probs_neg += mean_prob probs_neg_count += 1 self.dropout_generator.set_value(self.args.dropout_generator) self.dropout_encoder.set_value(self.args.dropout_encoder) count = probs_pos_count + probs_neg_count if probs_pos_count > 0 and probs_neg_count > 0: return 1 - num_errors / count, tot_cost / count, probs_pos / probs_pos_count, probs_neg / probs_neg_count else: return 0, 0, 0, 0
def train(self, D, D_C, D_valid, D_C_valid, flic, prod_to_sent): #--------------------------------------------------------------------------------------------------------------- # Looping through the data (i.e. the 'Training-For-Loop'). #--------------------------------------------------------------------------------------------------------------- # For time measurements t_training = 0.0 t_0 = 0.0 print("Start training.") print("Num links in training set = {}".format(len(D) + len(D_C))) for epoch in range(self.args.max_epochs): epoch += self.epochs_done iteration = 0 cost_epoch = 0 cost_tmp = 0 probs_pos = 0 probs_neg = 0 probs_pos_count = 0 probs_neg_count = 0 probs_pos_tmp = 0 probs_neg_tmp = 0 probs_pos_count_tmp = 0 probs_neg_count_tmp = 0 for context, x, y in training_sample(D, D_C): #------------------------------------------------------------------------------------------------------- # Train model #------------------------------------------------------------------------------------------------------- t_0 = timer() mean_prob, mean_cost = self.train_model_t( flic[context], flic[x], y) t_training += (timer() - t_0) #------------------------------------------------------------------------------------------------------- # Output of the training and validation scores (only sometimes) #------------------------------------------------------------------------------------------------------- cost_epoch += mean_cost cost_tmp += mean_cost if y == 1: probs_pos += mean_prob probs_pos_tmp += mean_prob probs_pos_count += 1 probs_pos_count_tmp += 1 else: probs_neg += mean_prob probs_neg_tmp += mean_prob probs_neg_count += 1 probs_neg_count_tmp += 1 iteration += 1 # All 1000 datapoints if iteration % 1000 == 0: print("Cost on the training set after {} iterations: {}". format( iteration, cost_tmp / (probs_pos_count_tmp + probs_neg_count_tmp))) if iteration % 10000 == 0 and self.args.save_model: self.store(self.args.save_model, epoch, iteration) print("Model saved. Epoch = {}, Iteration = {}".format( epoch, iteration)) if self.sig_handler.exit == True: if self.best_val_error == sys.float_info.max and self.args.save_model: self.best_val_error = -1 self.store( self.args.save_model + '/best_valerr_' + str("{0:3f}.pkl".format(self.best_val_error))) return (self.best_val_error, "SIGNAL") # ---------------------------------------------------------------------------------------------------------- # Output per epoch # ---------------------------------------------------------------------------------------------------------- if self.args.save_model: self.store(self.args.save_model, epoch, 0) print("\nEpoch {} done. Model saved! ".format(epoch)) else: print("\nEpoch {} done.".format(epoch)) # Timing output if self.args.measure_timing: print("Time Training = {}".format(t_training / iteration)) print("Calculating the average cost in this epoch.") if D_valid is not None and D_C_valid is not None: accuracy, valid_cost, valid_pos_score, valid_neg_score = self.validation_cost( D_valid, D_C_valid, prod_to_sent, flic) self._print_scores( (valid_cost, valid_pos_score, valid_neg_score), (cost_epoch / (probs_pos_count + probs_neg_count), probs_pos / probs_pos_count, probs_neg / probs_neg_count)) if valid_cost < self.best_val_error and self.args.save_model: self.best_val_error = min(self.best_val_error, valid_cost) self.store(self.args.save_model + '/best_valerr_' + str("{0:3f}.pkl".format(valid_cost))) # Decreasing the learning rate and reloading the best model if the validation cost # is not decreasing anymore. if self.args.adaptive_lrs: lrs_adapted = self.adaptive_learning_rate.adapt_learning_rate( valid_cost, self.lr) if self.args.save_model and self.args.adaptive_lrs and lrs_adapted: lr_value = self.lr.get_value() self.load(self.args.save_model + '/best_valerr_' + str("{0:3f}.pkl".format(self.best_val_error))) self.lr.set_value(lr_value) print("Best model loaded.") # If the learning rates are very small, we stop our training. if self.lr.get_value() < 1e-5: return (self.best_val_error, "END") else: self._print_scores( None, (cost_epoch / (probs_pos_count + probs_neg_count), probs_pos / probs_pos_count, probs_neg / probs_neg_count)) print("") # Resets all temporary variables. cost_epoch = 0 if epoch + 1 >= self.args.max_epochs: return (self.best_val_error, "END") if (epoch + 1) % 10 == 0: self.epochs_done += 10 self.store(self.args.save_model + '/after_10_epochs_' + str("{0:3f}.pkl".format(valid_cost))) return (valid_cost, "10_epochs") probs_neg = 0 probs_pos = 0 probs_pos_count = 0 probs_neg_count = 0 return (self.best_val_error, "END")
def testset_evaluation(self, D_test, D_C_test, prod_to_sent, flic, num_samples=20, return_all_probs=False): self.dropout_encoder.set_value(0.0) self.dropout_generator.set_value(0.0) probs_pos = 0 probs_neg = 0 probs_pos_count = 0 probs_neg_count = 0 tot_cost = 0 sampled_sentences = [] costs = [] num_errors = 0.0 for context, x, y in training_sample(D_test, D_C_test): sent_embs = prod_to_sent[x][0] if not self.args.sample_all_sentences: # Sampling L = self.get_L_t(sent_embs, flic[context]) samples, max_sents_count = self.generator.sample( L, num_samples) mean_prob, mean_cost = self.validate_t(sent_embs, samples, max_sents_count, flic[context], y) # return_all_probs is only needed for the script which prints out sampled sentences if return_all_probs: sampled_sentences.append( (context, x, y, mean_prob, get_strings_from_samples(prod_to_sent[x][1], samples))) else: sampled_sentences.append( (context, x, get_strings_from_samples(prod_to_sent[x][1], samples))) costs.append(mean_cost) else: mean_prob, mean_cost = self.validate_t(sent_embs, flic[context], y) tot_cost += mean_cost if y == 1: probs_pos += mean_prob probs_pos_count += 1 else: probs_neg += mean_prob probs_neg_count += 1 if abs(mean_prob - y) > 0.5: num_errors += 1 self.dropout_generator.set_value(self.args.dropout_generator) self.dropout_encoder.set_value(self.args.dropout_encoder) return sampled_sentences, 1 - num_errors / (probs_pos_count + probs_neg_count), \ (tot_cost / (probs_pos_count + probs_neg_count), probs_pos / probs_pos_count, probs_neg / probs_neg_count)
def validation_cost(self, D_valid, D_C_valid, prod_to_sent, flic, num_samples=10): """ Evaluates the model on the validation set. :param D_valid: Validation set (dataset D) :param D_C_valid: Validation set (dataset D_C) :type flic: FLIC :param flic: The FLIC model trained on the product data set :type prod_to_sent: dict: string -> Set of sentences :param prod_to_sent: A dict from product ID's to a set of (sentence, sent_embedding) pair. :type num_samples: int :param num_samples: The number of reinforcement sampled sets. :return value: (accuracy, mean_cost, mean_pos_probs, mean_neg_probs) """ probs_pos = 0 probs_neg = 0 probs_pos_count = 0 probs_neg_count = 0 tot_cost = 0 num_errors = 0.0 self.dropout_encoder.set_value(0.0) self.dropout_generator.set_value(0.0) for context, x, y in training_sample(D_valid, D_C_valid): sent_embs = prod_to_sent[x][0] if not self.args.sample_all_sentences: L = self.get_L_t(sent_embs, flic[context]) samples, max_sents_count = self.generator.sample( L, num_samples) mean_prob, mean_cost = self.validate_t(sent_embs, samples, max_sents_count, flic[context], y) else: mean_prob, mean_cost = self.validate_t(sent_embs, flic[context], y) if abs(mean_prob - y) > 0.5: num_errors += 1 tot_cost += mean_cost if y == 1: probs_pos += mean_prob probs_pos_count += 1 else: probs_neg += mean_prob probs_neg_count += 1 self.dropout_generator.set_value(self.args.dropout_generator) self.dropout_encoder.set_value(self.args.dropout_encoder) count = probs_pos_count + probs_neg_count if probs_pos_count > 0 and probs_neg_count > 0: return 1 - num_errors / count, tot_cost / count, probs_pos / probs_pos_count, probs_neg / probs_neg_count else: return 0, 0, 0, 0