def run_epoch(self, epoch_name: str, epoch_num, data, is_training: bool): loss = 0 edge_loss, kl_loss, node_symbol_loss = 0, 0, 0 start_time = time.time() processed_graphs = 0 batch_iterator = ThreadedIterator(self.make_minibatch_iterator( data, is_training), max_queue_size=5) for step, batch_data in enumerate(batch_iterator): num_graphs = batch_data[self.placeholders['num_graphs']] processed_graphs += num_graphs batch_data[self.placeholders['is_generative']] = False # Randomly sample from normal distribution batch_data[self.placeholders['z_prior']] = utils.generate_std_normal(\ self.params['batch_size'], batch_data[self.placeholders['num_vertices']],self.params['encoding_size']) batch_data[self.placeholders['z_prior_in']] = utils.generate_std_normal(\ self.params['batch_size'], batch_data[self.placeholders['num_vertices']],self.params['hidden_size']) if is_training: batch_data[self.placeholders[ 'out_layer_dropout_keep_prob']] = self.params[ 'out_layer_dropout_keep_prob'] fetch_list = [ self.ops['loss'], self.ops['mean_edge_loss_in'], self.ops['mean_kl_loss_in'], self.ops['mean_node_symbol_loss_in'], self.ops['train_step'] ] else: batch_data[ self.placeholders['out_layer_dropout_keep_prob']] = 1.0 fetch_list = [ self.ops['loss'], self.ops['mean_edge_loss_in'], self.ops['mean_kl_loss_in'], self.ops['mean_node_symbol_loss_in'] ] result = self.sess.run(fetch_list, feed_dict=batch_data) batch_loss = result[0] loss += batch_loss * num_graphs edge_loss += result[1] * num_graphs kl_loss += result[2] * num_graphs node_symbol_loss += result[3] * num_graphs print( "Running %s, batch %i (has %i graphs). Loss so far: %.4f. Edge loss: %.4f, KL loss: %.4f, Node symbol loss: %.4f" % (epoch_name, step, num_graphs, loss / processed_graphs, edge_loss / processed_graphs, kl_loss / processed_graphs, node_symbol_loss / processed_graphs), end='\r') loss = loss / processed_graphs edge_loss = edge_loss / processed_graphs kl_loss = kl_loss / processed_graphs node_symbol_loss = node_symbol_loss / processed_graphs instance_per_sec = processed_graphs / (time.time() - start_time) return (loss, edge_loss, kl_loss, node_symbol_loss), instance_per_sec
def pred(self, testfile):#output the planner with the lowest prob to be timeout, and get the percentage of timeout using these planners if os.path.exists(self.best_model_file): self.restore_model(self.best_model_file) testdata = self.load_data(testfile,is_training_data=False) processed_graphs = 0 accuracy_ops = [self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids']] batch_iterator = ThreadedIterator(self.make_minibatch_iterator(testdata, False), max_queue_size=5) preds = [] accs = [] for step, batch_data in enumerate(batch_iterator): num_graphs = batch_data[self.placeholders['num_graphs']] processed_graphs += num_graphs batch_data[self.placeholders['out_layer_dropout_keep_prob']] = 1.0 fetch_list = [self.ops['predicted_values'], accuracy_ops] result = self.sess.run(fetch_list, feed_dict=batch_data) (batch_pred, batch_accuracies) = (result[0], result[1]) batch_pred = np.array(batch_pred) if len(batch_pred.shape)==1: batch_pred = np.expand_dims(batch_pred,1) preds.append(batch_pred.T) accs.append(np.array(batch_accuracies)*num_graphs) planner = np.argmin(np.concatenate(preds,0),axis=1) pred_labels = [testdata[ex_id]["labels"][planner[ex_id]] for ex_id in range(len(testdata))] return np.concatenate(preds,0), sum(pred_labels), len(testdata)
def generate_vector(self): with self.graph.as_default(): if self.args.get('--restore') is not None: self.myalldata_dir = '/data/bwj/test/mangrove/ml/ggnn/data/singles/rw/' self.data_dir = self.myalldata_dir for _, _, files in os.walk(self.myalldata_dir): for singlefile in files: single_data = self.load_data(singlefile, is_training_data=False) batch_iterator = ThreadedIterator( self.make_minibatch_iterator( self.valid_data, False), max_queue_size=5) for batch_data in batch_iterator: batch_data[self.placeholders[ 'out_layer_dropout_keep_prob']] = 1.0 final_node = self.sess.run( self.compute_final_node_representations(), feed_dict=batch_data) final_node = np.mean(final_node, 0) np.save( './outputs/ggnn_vectors/{}_final_node.npy'. format(singlefile.replace('.json', '')), final_node) #_, singel_accs, _, _, _, _ = self.run_epoch("Generate {}'s vector".format(singlefile), single_data, False, True) #best_val_acc = np.sum(valid_accs) #print("%s Pred: %.2f" % (singlefile, best_val_acc)) print("{} ok".format(singlefile))
def run_epoch(self, epoch_name: str, data, is_training: bool): loss = 0 accuracies = [] accuracy_ops = [self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids']] start_time = time.time() processed_graphs = 0 batch_iterator = ThreadedIterator(self.make_minibatch_iterator(data, is_training), max_queue_size=5) for step, batch_data in enumerate(batch_iterator): num_graphs = batch_data[self.placeholders['num_graphs']] processed_graphs += num_graphs if is_training: batch_data[self.placeholders['out_layer_dropout_keep_prob']] = self.params['out_layer_dropout_keep_prob'] fetch_list = [self.ops['loss'], accuracy_ops, self.ops['train_step']] else: batch_data[self.placeholders['out_layer_dropout_keep_prob']] = 1.0 fetch_list = [self.ops['loss'], accuracy_ops] result = self.sess.run(fetch_list, feed_dict=batch_data) (batch_loss, batch_accuracies) = (result[0], result[1]) loss += batch_loss accuracies.append(np.array(batch_accuracies) * num_graphs) print("Running %s, batch %i (has %i graphs). Loss so far: %.4f" % (epoch_name, step, num_graphs, loss / processed_graphs), end='\r') accuracies = np.sum(accuracies, axis=0) / processed_graphs loss = loss / processed_graphs # error_ratios = accuracies / chemical_accuracies[self.params["task_ids"]] error_ratios = accuracies instance_per_sec = processed_graphs / (time.time() - start_time) return loss, accuracies, error_ratios, instance_per_sec
def pred(self, testfile, contextfile): if os.path.exists(self.best_model_file): self.restore_model(self.best_model_file) testdata, skipped_graphs = self.load_data(testfile,is_training_data=False, context_file= contextfile) processed_graphs = 0 batch_iterator = ThreadedIterator(self.make_minibatch_iterator(testdata, False), max_queue_size=5) preds = [] for step, batch_data in enumerate(batch_iterator): num_graphs = batch_data[self.placeholders['num_graphs']] processed_graphs += num_graphs batch_data[self.placeholders['out_layer_dropout_keep_prob']] = 1.0 fetch_list = [self.ops['predicted_values']] result = self.sess.run(fetch_list, feed_dict=batch_data) (batch_pred) = (result[0]) batch_pred = np.array(batch_pred) if len(batch_pred.shape) == 1: batch_pred = np.expand_dims(batch_pred, 1) preds.append(batch_pred.T) preds = np.concatenate(preds,0) pred_id = np.argmin(preds, axis=1) pred_labels = [testdata[ex_id]["labels"][pred_id[ex_id]] for ex_id in range(len(testdata))] return sum(pred_labels), len(testdata), preds, skipped_graphs
def run_epoch(self, epoch_name: str, data): loss = 0 start_time = time.time() processed_graphs = 0 batch_iterator = ThreadedIterator(self.make_minibatch_iterator( data, True), max_queue_size=5) for step, batch_data in enumerate(batch_iterator): num_graphs = batch_data[self.placeholders['num_graphs']] processed_graphs += num_graphs fetch_list = [ self.ops['loss'], self.ops['train_step'], self.placeholders['state_Dec'] ] result = self.sess.run(fetch_list, feed_dict=batch_data) batch_loss = result[0] loss += batch_loss * num_graphs print("Running epoch: " + str(epoch_name) + ", batch " + str(step) + ", num_graphs: " + str(num_graphs) + ", loss so far: " + str(loss / processed_graphs)) loss = loss / processed_graphs instance_per_sec = processed_graphs / (time.time() - start_time) return loss, instance_per_sec
def run_epoch(self, epoch_name: str, data, is_training: bool, start_step: int = 0): chemical_accuracies = np.array([ 0.066513725, 0.012235489, 0.071939046, 0.033730778, 0.033486113, 0.004278493, 0.001330901, 0.004165489, 0.004128926, 0.00409976, 0.004527465, 0.012292586, 0.037467458 ]) loss = 0 accuracies = [] accuracy_ops = [ self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids'] ] start_time = time.time() processed_graphs = 0 steps = 0 batch_iterator = ThreadedIterator(self.make_minibatch_iterator( data, is_training), max_queue_size=5) for step, batch_data in enumerate(batch_iterator): num_graphs = batch_data[self.placeholders['num_graphs']] processed_graphs += num_graphs if is_training: batch_data[self.placeholders[ 'out_layer_dropout_keep_prob']] = self.params[ 'out_layer_dropout_keep_prob'] fetch_list = [ self.ops['loss'], accuracy_ops, self.ops['summary'], self.ops['train_step'] ] else: batch_data[ self.placeholders['out_layer_dropout_keep_prob']] = 1.0 fetch_list = [ self.ops['loss'], accuracy_ops, self.ops['summary'] ] result = self.sess.run(fetch_list, feed_dict=batch_data) (batch_loss, batch_accuracies, batch_summary) = (result[0], result[1], result[2]) writer = self.train_writer if is_training else self.valid_writer writer.add_summary(batch_summary, start_step + step) loss += batch_loss * num_graphs accuracies.append(np.array(batch_accuracies) * num_graphs) print("Running %s, batch %i (has %i graphs). Loss so far: %.4f" % (epoch_name, step, num_graphs, loss / processed_graphs), end='\r') steps += 1 accuracies = np.sum(accuracies, axis=0) / processed_graphs loss = loss / processed_graphs error_ratios = accuracies / chemical_accuracies[ self.params["task_ids"]] instance_per_sec = processed_graphs / (time.time() - start_time) return loss, accuracies, error_ratios, instance_per_sec, steps
def run_epoch(self, epoch_name: str, epoch_num, data, is_training: bool): loss = 0 start_time = time.time() processed_graphs = 0 batch_iterator = ThreadedIterator(self.make_minibatch_iterator( data, is_training), max_queue_size=5) for step, batch_data in enumerate(batch_iterator): num_graphs = batch_data[self.placeholders['num_graphs']] processed_graphs += num_graphs batch_data[self.placeholders['is_generative']] = False # Randomly sample from normal distribution batch_data[self.placeholders['z_prior']] = utils.generate_std_normal(\ self.params['batch_size'], batch_data[self.placeholders['num_vertices']],self.params['hidden_size']) if is_training: batch_data[self.placeholders[ 'out_layer_dropout_keep_prob']] = self.params[ 'out_layer_dropout_keep_prob'] fetch_list = [ self.ops['loss'], self.ops['train_step'], self.ops["edge_loss"], self.ops['kl_loss'], self.ops['node_symbol_prob'], self.placeholders['node_symbols'], self.ops['qed_computed_values'], self.placeholders['target_values'], self.ops['total_qed_loss'], self.ops['mean'], self.ops['logvariance'], self.ops['grads'], self.ops['mean_edge_loss'], self.ops['mean_node_symbol_loss'], self.ops['mean_kl_loss'], self.ops['mean_total_qed_loss'] ] else: batch_data[ self.placeholders['out_layer_dropout_keep_prob']] = 1.0 fetch_list = [ self.ops['mean_edge_loss'], self.ops['accuracy_task0'] ] result = self.sess.run(fetch_list, feed_dict=batch_data) """try: if is_training: self.save_intermediate_results(batch_data[self.placeholders['adjacency_matrix']], result[11], result[12], result[4], result[5], result[9], result[10], result[6], result[7], result[13], result[14]) except IndexError: pass""" batch_loss = result[0] loss += batch_loss * num_graphs print("Running %s, batch %i (has %i graphs). Loss so far: %.4f" % (epoch_name, step, num_graphs, loss / processed_graphs), end='\r') loss = loss / processed_graphs instance_per_sec = processed_graphs / (time.time() - start_time) return loss, instance_per_sec
def findRep(self, data, name, training): batch_iterator = ThreadedIterator(self.make_minibatch_iterator( data, False), max_queue_size=1) grReps_enc = [] grReps_dec = [] grReps_static = [] step_enc = 0 step_dec = 0 step_static = 0 start_att = self.params["win"] step_att = self.params["win"] start_idx_enc = step_enc start_idx_dec = step_dec start_idx_static = step_static lastSeenGr_enc = -1 lastSeenGr_dec = -1 lastSeenGr_static = -1 fw = open("att/" + self.run_id + "." + name + ".dec.txt", "w") for batch_ind, batch_data in enumerate(batch_iterator): enc_output, static_grRep, _, state_dec, batch_att = self.sess.run( (self.placeholders['enc_output'], self.placeholders['static_gr_rep'], self.placeholders['dec_output'], self.placeholders['state_Dec'], self.placeholders['att']), feed_dict=batch_data) grReps_enc, start_idx_enc, step_enc, lastSeenGr_enc = self.finalRep( enc_output, grReps_enc, start_idx_enc, step_enc, lastSeenGr_enc) grReps_dec, start_idx_dec, step_dec, lastSeenGr_dec = self.finalRep( state_dec, grReps_dec, start_idx_dec, step_dec, lastSeenGr_dec) grReps_static, start_idx_static, step_static, lastSeenGr_static = self.finalRep( static_grRep, grReps_static, start_idx_static, step_static, lastSeenGr_static) self.writeAttBatch(fw, batch_att, start_att, step_att) start_att = start_idx_dec + self.params["win"] step_att = step_dec + self.params["win"] fw.flush() fw.close() self.writeGrRepSVMformat(grReps_enc, data, name, "enc", training) self.writeGrRepSVMformat(grReps_dec, data, name, "dec", training) self.writeGrRepSVMformat(grReps_static, data, name, "static", training) print(str(len(data)))
def run_epoch(self, epoch_name: str, data, is_training: bool, start_step: int = 0): chemical_accuracies = np.array([ 0.066513725, 0.012235489, 0.071939046, 0.033730778, 0.033486113, 0.004278493, 0.001330901, 0.004165489, 0.004128926, 0.00409976, 0.004527465, 0.012292586, 0.037467458 ]) loss = 0 accuracies = [] accuracy_ops = [ self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids'] ] start_time = time.time() processed_data = 0 steps = 0 batch_iterator = ThreadedIterator(self.make_minibatch_iterator( data, is_training), max_queue_size=5) with tf.Session() as session: for my_step, my_batch_data in enumerate(batch_iterator): precessed_data += self.params['batch_size'] #other information should be in fetch_list like loss fetch_list = [self.ops['loss'], logits] result = session.run(fetch_list, feed_dict=my_batch_data) #result is the output layer (batch_loss, batch_logits) = (result[0], result[1]) loss += batch_loss * self.params['batch_size'] print( "Running %s, batch %i (has %i graphs). Loss so far: %.4f" % (epoch_name, step, self.params['batch_size'], loss / processed_data), end='\r') steps += 1 loss = loss / processed_data instance_per_sec = processed_data / (time.time() - start_time) return loss, instance_per_sec, steps
def findRep(self, data, name, training): batch_iterator = ThreadedIterator(self.make_minibatch_iterator( data, False), max_queue_size=5) grReps_enc = [] grReps_dec = [] grReps_static = [] step_enc = 0 step_dec = 0 step_static = 0 start_idx_enc = step_enc start_idx_dec = step_dec start_idx_static = step_static lastSeenGr_enc = -1 lastSeenGr_dec = -1 lastSeenGr_static = -1 for batch_ind, batch_data in enumerate(batch_iterator): enc_output, static_grRep, _, state_dec = self.sess.run( (self.placeholders['enc_output'], self.placeholders['static_gr_rep'], self.placeholders['dec_output'], self.placeholders['state_Dec']), feed_dict=batch_data) grReps_enc, start_idx_enc, step_enc, lastSeenGr_enc = self.finalRep( enc_output, grReps_enc, start_idx_enc, step_enc, lastSeenGr_enc) grReps_dec, start_idx_dec, step_dec, lastSeenGr_dec = self.finalRep( state_dec, grReps_dec, start_idx_dec, step_dec, lastSeenGr_dec) grReps_static, start_idx_static, step_static, lastSeenGr_static = self.finalRep( static_grRep, grReps_static, start_idx_static, step_static, lastSeenGr_static) self.writeGrRepSVMformat(grReps_enc, data, name, "enc", training) self.writeGrRepSVMformat(grReps_dec, data, name, "dec", training) self.writeGrRepSVMformat(grReps_static, data, name, "static", training) print(str(len(data)))
def run_epoch(self, epoch_name: str, data, is_training: bool): loss = 0 accuracies = [] precision = [] recall = [] f1=[] accuracy_ops = [self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids']] precision_ops = [self.ops['precision_task%i' % task_id] for task_id in self.params['task_ids']] recall_ops = [self.ops['recall_task%i' % task_id] for task_id in self.params['task_ids']] f1_ops = [self.ops['f1_task%i' % task_id] for task_id in self.params['task_ids']] start_time = time.time() processed_graphs = 0 batch_iterator = ThreadedIterator(self.make_minibatch_iterator(data, is_training), max_queue_size=5) for step, batch_data in enumerate(batch_iterator): num_graphs = batch_data[self.placeholders['num_graphs']] processed_graphs += num_graphs if is_training: batch_data[self.placeholders['out_layer_dropout_keep_prob']] = self.params['out_layer_dropout_keep_prob'] fetch_list = [self.ops['loss'], accuracy_ops, accuracy_ops, precision_ops, recall_ops, f1_ops, self.ops['train_step']] else: batch_data[self.placeholders['out_layer_dropout_keep_prob']] = 1.0 fetch_list = [self.ops['loss'], accuracy_ops, precision_ops, recall_ops, f1_ops] result = self.sess.run(fetch_list, feed_dict=batch_data) (batch_loss, batch_accuracies, batch_precision, batch_recall, batch_f1) = (result[0], result[1], result[2], result[3], result[4]) loss += batch_loss * num_graphs accuracies.append(np.array(batch_accuracies) * num_graphs) precision.append(np.array(batch_precision) * num_graphs) recall.append(np.array(batch_recall) * num_graphs) f1.append(np.array(batch_f1) * num_graphs) accuracies = np.sum(accuracies, axis=0) / processed_graphs precision = np.sum(precision, axis=0) / processed_graphs recall = np.sum(recall, axis=0) / processed_graphs f1 = np.sum(f1, axis=0) / processed_graphs loss = loss / processed_graphs instance_per_sec = processed_graphs / (time.time() - start_time) return loss, accuracies, precision, recall, f1, instance_per_sec
def test(self, testfile): if os.path.exists(self.best_model_file): self.restore_model(self.best_model_file) testdata, skipped_graphs = self.load_data(testfile,is_training_data=False) processed_graphs = 0 accuracy_ops = [self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids']] batch_iterator = ThreadedIterator(self.make_minibatch_iterator(testdata, False), max_queue_size=5) preds = [] accs = [] for step, batch_data in enumerate(batch_iterator): num_graphs = batch_data[self.placeholders['num_graphs']] processed_graphs += num_graphs batch_data[self.placeholders['out_layer_dropout_keep_prob']] = 1.0 fetch_list = [self.ops['predicted_values'], accuracy_ops] result = self.sess.run(fetch_list, feed_dict=batch_data) (batch_pred, batch_accuracies) = (result[0], result[1]) batch_pred = np.array(batch_pred) if len(batch_pred.shape) == 1: batch_pred = np.expand_dims(batch_pred, 1) preds.append(batch_pred.T) accs.append(np.array(batch_accuracies)*num_graphs) return np.concatenate(preds,0), np.sum(accs, axis=0)/float(processed_graphs)
def run_epoch(self, epoch_name: str, data, is_training: bool): chemical_accuracies = np.array([ 0.066513725, 0.012235489, 0.071939046, 0.033730778, 0.033486113, 0.004278493, 0.001330901, 0.004165489, 0.004128926, 0.00409976, 0.004527465, 0.012292586, 0.037467458 ]) loss = 0 average_precision = 0 accuracies = [] accuracy_ops = [ self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids'] ] start_time = time.time() processed_graphs = 0 batch_iterator = ThreadedIterator(self.make_minibatch_iterator( data, is_training), max_queue_size=5) for step, batch_data in enumerate(batch_iterator): num_graphs = batch_data[self.placeholders['num_graphs']] processed_graphs += num_graphs if is_training: batch_data[self.placeholders[ 'out_layer_dropout_keep_prob']] = self.params[ 'out_layer_dropout_keep_prob'] #fetch_list = [self.ops['loss'], accuracy_ops, self.ops['target_values'], self.ops['pred_prob'], self.ops['optimizer']] #fetch_list = [self.ops['loss'], accuracy_ops, self.ops['optimizer']] fetch_list = [ self.ops['loss'], accuracy_ops, self.ops['train_step'] ] #fetch_list = [self.ops['loss'], accuracy_ops, self.ops['optimizer'], self.ops['computed_values'], self.ops['target_values'], self.ops['prediction']] #fetch_list = [self.ops['loss'], accuracy_ops, self.ops['train_step'], self.ops['computed_values'], self.ops['target_values'], self.ops['prediction']] else: batch_data[ self.placeholders['out_layer_dropout_keep_prob']] = 1.0 #fetch_list = [self.ops['loss'], accuracy_ops, self.ops['target_values'], self.ops['pred_prob']] fetch_list = [self.ops['loss'], accuracy_ops] result = self.sess.run(fetch_list, feed_dict=batch_data) (batch_loss, batch_accuracies) = (result[0], result[1]) #(batch_loss, batch_accuracies, batch_target_values, batch_pred_prob) = (result[0], result[1], result[2], result[3]) loss += batch_loss * num_graphs #ap = average_precision_score(batch_target_values, batch_pred_prob) #average_precision += ap * num_graphs accuracies.append(np.array(batch_accuracies) * num_graphs) print("Running %s, batch %i (has %i graphs). Loss so far: %.4f" % (epoch_name, step, num_graphs, loss / processed_graphs), end='\r') #x = self.sess.run(self.output) #prediction = result[-1] #target_values = result[-2] #computed_values = result[-3] #import pdb #pdb.set_trace() print("Num graphs proessed:", processed_graphs) #average_precision = average_precision / processed_graphs accuracies = np.sum(accuracies, axis=0) / processed_graphs loss = loss / processed_graphs error_ratios = accuracies / chemical_accuracies[ self.params["task_ids"]] instance_per_sec = processed_graphs / (time.time() - start_time) return loss, average_precision, accuracies, error_ratios, instance_per_sec
def train(self, is_test): log_to_save = [] total_time_start = time.time() summ_line = '%d\t%s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f' line = 'loss:%.2f\tacc:%.2f\tprecision:%.2f\trecall:%.2f\tf1:%.2f\tspeed:%.2f' bak_train_data = [] bak_valid_data = [] with self.graph.as_default(): if self.args.get('--restore') is not None: #valid_loss, valid_accs, valid_precision, valid_recall, valid_f1, valid_speed = self.run_epoch("Test (validation)", self.valid_data, False) #with open('./outputs/test.log', 'a') as f: #print(line%(valid_loss, valid_accs, valid_precision, valid_recall, valid_f1, valid_speed), file = f) _, valid_accs, _, _, _, _ = self.run_epoch( "Resumed (validation)", self.valid_data, False, False) best_val_acc = np.sum(valid_accs) best_val_acc_epoch = 0 print( "\r\x1b[KResumed operation, initial cum. val. acc: %.5f" % best_val_acc) else: (best_val_acc, best_val_acc_epoch) = (0., 0.) if is_test == False: for epoch in range(1, self.params['num_epochs'] + 1): train_loss, train_accs, train_precision, train_recall, train_f1, train_speed = self.run_epoch( "epoch %i (training)" % epoch, self.train_data, True, False) valid_loss, valid_accs, valid_precision, valid_recall, valid_f1, valid_speed = self.run_epoch( "epoch %i (validation)" % epoch, self.valid_data, False, False) epoch_time = time.time() - total_time_start print(summ_line % (epoch, self.params['train_file'], train_loss, train_accs, train_precision, train_recall, train_f1, train_speed, epoch_time)) print(summ_line % (epoch, self.params['valid_file'], valid_loss, valid_accs, valid_precision, valid_recall, valid_f1, valid_speed, epoch_time)) # with open('./outputs/train.log', 'a') as f: # print(line%(train_loss, train_accs, train_precision, train_recall, train_f1, train_speed), file = f) #with open('./outputs/test.log', 'a') as f: # print(line%(valid_loss, valid_accs, valid_precision, valid_recall, valid_f1, valid_speed), file = f) bak_train_data.append([ epoch, self.params['train_file'], train_loss, np.sum(train_accs), np.sum(train_precision), np.sum(train_recall), np.sum(train_f1), train_speed ]) bak_valid_data.append([ epoch, self.params['valid_file'], valid_loss, np.sum(valid_accs), np.sum(valid_precision), np.sum(valid_recall), np.sum(valid_f1), valid_speed ]) if is_test == False: val_acc = np.sum(valid_accs) # type: float if val_acc > best_val_acc: self.save_model(self.best_model_file) print( "(Best epoch so far, cum. val. acc increased to %.5f from %.5f. Saving to '%s')" % (val_acc, best_val_acc, self.best_model_file)) best_val_acc = val_acc best_val_acc_epoch = epoch # elif epoch - best_val_acc_epoch >= self.params['patience']: # print("Stopping training after %i epochs without improvement on validation accuracy." % self.params['patience']) # break if self.params['timeout'] < epoch_time: print("Stopping training after %i epochs timeout." % epoch) break header = "epoch\tfile\tloss\taccs\tprecision\trecall\tf1\tspeed\n" if is_test == True: batch_iterator = ThreadedIterator(self.make_minibatch_iterator( self.valid_data, False), max_queue_size=5) for batch_data in batch_iterator: batch_data[ self.placeholders['out_layer_dropout_keep_prob']] = 1.0 valid_loss, valid_accs, valid_precision, valid_recall, valid_f1, valid_speed = self.run_epoch( "Test: ", batch_data, False, True) print("Test: %s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % (self.params['valid_file'], valid_loss, valid_accs, valid_precision, valid_recall, valid_f1, valid_speed)) #with open(self.online_data_backup_file + "_train_final.txt", "w") as f: # f.write(header) # for line in bak_train_data: # f.write("\t".join([str(item) for item in line]) + "\n") with open(self.online_data_backup_file + "_test.txt", "w") as f: f.write("file\tloss\taccs\tprecision\trecall\tf1\tspeed\n") f.write("\t".join([ self.params['valid_file'], valid_loss, valid_accs, valid_precision, valid_recall, valid_f1, valid_speed ]) + "\n") else: with open(self.online_data_backup_file + "_train.txt", "w") as f: f.write(header) for line in bak_train_data: f.write("\t".join([str(item) for item in line]) + "\n") with open(self.online_data_backup_file + "_valid.txt", "w") as f: f.write(header) for line in bak_valid_data: f.write("\t".join([str(item) for item in line]) + "\n")
def run_epoch(self, epoch_name: str, data, is_training: bool, is_test: bool): init_begin_time = time.time() loss = 0 TP_all = [] TN_all = [] FP_all = [] FN_all = [] #accuracies = [] #precision = [] #recall = [] #f1=[] #accuracy_ops = [self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids']] #precision_ops = [self.ops['precision_task%i' % task_id] for task_id in self.params['task_ids']] #recall_ops = [self.ops['recall_task%i' % task_id] for task_id in self.params['task_ids']] #f1_ops = [self.ops['f1_task%i' % task_id] for task_id in self.params['task_ids']] TP_ops = [self.ops['TP%i' % task_id] for task_id in self.params['task_ids']] TN_ops = [self.ops['TN%i' % task_id] for task_id in self.params['task_ids']] FP_ops = [self.ops['FP%i' % task_id] for task_id in self.params['task_ids']] FN_ops = [self.ops['FN%i' % task_id] for task_id in self.params['task_ids']] start_time = time.time() processed_graphs = 0 batch_iterator = ThreadedIterator(self.make_minibatch_iterator(data, is_training), max_queue_size=5) init_end_time = time.time() step_num = 0 for step, batch_data in enumerate(batch_iterator): num_graphs = batch_data[self.placeholders['num_graphs']] processed_graphs += num_graphs run_begin_time = time.time() if is_training: batch_data[self.placeholders['out_layer_dropout_keep_prob']] = self.params['out_layer_dropout_keep_prob'] #fetch_list = [self.ops['loss'], accuracy_ops, accuracy_ops, precision_ops, recall_ops, f1_ops, self.ops['train_step']] fetch_list = [TP_ops, TN_ops, FP_ops, FN_ops, self.ops['loss']] else: batch_data[self.placeholders['out_layer_dropout_keep_prob']] = 1.0 #fetch_list = [self.ops['loss'], accuracy_ops, precision_ops, recall_ops, f1_ops] fetch_list = [TP_ops, TN_ops, FP_ops, FN_ops, self.ops['loss']] result = self.sess.run(fetch_list, feed_dict=batch_data) run_end_time = time.time() #if is_training: # train_vector = [] # target_vector = [] # res = self.sess.run([self.computed_values, self.prediction,self.tv,self.labels],feed_dict=batch_data) #res = self.sess.run([self.computed_values, self.tv],feed_dict=batch_data) #train_vector = res[0] #target_vector = res[1] # computed_v = res[0] # pred = res[1] # target_v = res[2] # labels = res[3] # for i, array in enumerate(computed_v): # train_vector.append(array[pred[i]]) # for i, array in enumerate(target_v): # target_vector.append(array[labels[i]]) # with open('./outputs/train_vector.txt','a') as f: # f.write(str(train_vector)+'\n') # with open('./outputs/target_vector.txt','a') as f: # f.write(str(target_vector)+'\n') #else: #vector = self.sess.run(self.computed_values,feed_dict=batch_data) # vector = [] # for i, array in enumerate(self.sess.run(self.computed_values,feed_dict=batch_data)): # indices = self.sess.run(self.prediction,feed_dict=batch_data) # vector.append(array[indices[i]]) # with open('./outputs/vector.txt','a') as f: # f.write(str(vector)+'\n') # gated_output = self.sess.run(self.gated_outputs,feed_dict=batch_data) step_num = step if not is_training and is_test: final_node = self.sess.run(self.compute_final_node_representations(),feed_dict=batch_data) final_node = np.mean(final_node, 0) np.save('./outputs/ggnn_vector/{}_test_final_node.npy'.format(self.valid_file), final_node) # np.save('./outputs/gated_output.npy', gated_output) #with open('./outputs/gated_outputs.txt', 'a') as f: # f.write(str(gated_output)+'\n') (TP_batch, TN_batch, FP_batch, FN_batch, batch_loss) = (result[0], result[1], result[2], result[3], result[4]) TP_all.append(TP_batch) TN_all.append(TN_batch) FP_all.append(FP_batch) FN_all.append(FN_batch) #(batch_loss, batch_accuracies, batch_precision, batch_recall, batch_f1) = (result[0], result[1], result[2], result[3], result[4]) loss += batch_loss * num_graphs #accuracies.append(np.array(batch_accuracies) * num_graphs) #precision.append(np.array(batch_precision) * num_graphs) #recall.append(np.array(batch_recall) * num_graphs) #f1.append(np.array(batch_f1) * num_graphs) result_begin_time = time.time() TP_all = tf.reduce_sum(TP_all) TN_all = tf.reduce_sum(TN_all) FP_all = tf.reduce_sum(FP_all) FN_all = tf.reduce_sum(FN_all) print('TP: ', self.sess.run(TP_all), '\tTN: ', self.sess.run(TN_all), '\tFP: ', self.sess.run(FP_all), '\tFN: ', self.sess.run(FN_all)) #accuracies = np.sum(accuracies, axis=0) / processed_graphs #precision = np.sum(precision, axis=0) / processed_graphs #recall = np.sum(recall, axis=0) / processed_graphs #f1 = np.sum(f1, axis=0) / processed_graphs TP = self.sess.run(TP_all) TN = self.sess.run(TN_all) FP = self.sess.run(FP_all) FN = self.sess.run(FN_all) loss = loss / processed_graphs instance_per_sec = processed_graphs / (time.time() - start_time) accuracies = (TP_all + TN_all)/(TP_all + TN_all + FP_all + FN_all) precision = (TP_all)/(TP_all + FP_all) recall = (TP_all)/(TP_all + FN_all) f1 = 2 * precision * recall / (precision + recall) print(epoch_name) #with tf.Session() as sess: # accuracies = accuracies.eval() # precision = precision.eval() # recall = recall.eval() # f1 = f1.eval() accuracies = float(self.sess.run(accuracies)) precision = float(self.sess.run(precision)) recall = float(self.sess.run(recall)) f1 = float(self.sess.run(f1)) result_end_time = time.time() with open('./outputs/time.log', 'a') as f: out_str = 'init_time: {}\trun_once_time:{}\tloop_num:{}\tcompute_result_time: {}'.format(init_end_time-init_begin_time, run_end_time-run_begin_time, step_num, result_end_time-result_begin_time) f. write('---------- run epoch: '+str(int(run_begin_time))+'----------\n') f.write(out_str+'\n') print(out_str) #accuracies = list(accuracies.numpy()) #precision = list(precision.numpy()) #recall = list(recall.numpy()) #f1 = list(f1.numpy()) return loss, accuracies, precision, recall, f1, instance_per_sec, TP, TN, FP, FN
def run_epoch(self, epoch_name: str, data, is_training: bool, start_step: int = 0): chemical_accuracies = np.array([ 0.066513725, 0.012235489, 0.071939046, 0.033730778, 0.033486113, 0.004278493, 0.001330901, 0.004165489, 0.004128926, 0.00409976, 0.004527465, 0.012292586, 0.037467458 ]) loss = 0 accuracies = [] accuracy_ops = [ self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids'] ] start_time = time.time() processed_graphs = 0 steps = 0 acc_las, acc_uas = 0, 0 acc_uas_e = 0 batch_iterator = ThreadedIterator(self.make_minibatch_iterator( data, is_training), max_queue_size=5) all_labels, all_labels_e, all_computed_values, all_computed_values_e, \ all_num_vertices, all_masks, all_masks_e, all_ids, all_adj_m = \ [], [], [], [], [], [], [], [], [] if self.params.get('is_test'): csv_file = open(self.test_results_file, 'w', newline='') writer = csv.writer(csv_file) row_headers = [ 'loc', 'token', 'LAS', 'UAS', 'label_acc', 'POS', 'dep', 'dep_l', 'head', 'head_token', 'head_POS', 'head_dep', 'head_dep_l', 'target_head', 'target_pos', 'target_dep', 'target_dep_l', 'result_head', 'result_dep', 'result_dep', 'active_nodes' ] writer.writerow(row_headers) else: csv_file = None for step, batch_data in enumerate(batch_iterator): num_graphs = batch_data[self.placeholders['num_graphs']] processed_graphs += num_graphs fetch_list_names = [ 'loss', 'accuracy_ops', 'summary', 'loss_edges', 'labels', 'computed_values', 'final_node_representations', 'node_mask', 'losses', 'edge_weights', 'edge_biases', 'num_vertices', 'adjacency_matrix', 'sentences_id', 'word_inputs', 'target_pos', 'computed_values_edges', 'labels_edges', 'node_mask_edges', 'word_embeddings', 'emb_dropout_keep_prob' ] fetch_list = [ self.ops['loss'], accuracy_ops, self.ops['summary'], self.ops['loss_edges'], self.ops['labels'], self.ops['computed_values'], self.ops['final_node_representations'], self.ops['node_mask'], self.ops['losses'], self.weights['edge_weights'], self.weights['edge_biases'], self.placeholders['num_vertices'], self.placeholders['adjacency_matrix'], self.placeholders['sentences_id'], self.ops['word_inputs'], self.placeholders['target_pos'], self.ops['computed_values_edges'], self.placeholders['target_values_edges'], self.placeholders['node_mask_edges'], self.weights['word_embeddings'], self.placeholders['emb_dropout_keep_prob'] ] index_d = { fetch_list_names[i]: i for i in range(len(fetch_list_names)) } if is_training: batch_data[self.placeholders[ 'out_layer_dropout_keep_prob']] = self.params[ 'out_layer_dropout_keep_prob'] fetch_list.append(self.ops['train_step']) else: # it is not trainining because we are not requesting the self.ops['train_step'] parametr batch_data[ self.placeholders['out_layer_dropout_keep_prob']] = 1.0 result = self.sess.run(fetch_list, feed_dict=batch_data) #TODO: delete loss_edges = result[index_d['loss_edges']] labels = result[index_d['labels']] computed_values = result[index_d['computed_values']] final_node_representations = result[ index_d['final_node_representations']] node_mask = result[index_d['node_mask']] edge_weights = result[index_d['edge_weights']] edge_biases = result[index_d['edge_biases']] num_vertices = result[index_d['num_vertices']] adjacency_matrix = result[index_d['adjacency_matrix']] sentences_id = result[index_d['sentences_id']] word_inputs = result[index_d['word_inputs']] target_pos = result[index_d['target_pos']] computed_values_edges = result[index_d['computed_values_edges']] labels_edges = result[index_d['labels_edges']] node_mask_edges = result[index_d['node_mask_edges']] word_embeddings = result[index_d['word_embeddings']] emb_dropout_keep_prob = result[index_d['emb_dropout_keep_prob']] (batch_loss, batch_accuracies, batch_summary) = (result[0], result[1], result[2]) if not self.params.get('is_test'): writer = self.train_writer if is_training else self.valid_writer writer.add_summary(batch_summary, start_step + step) loss += batch_loss * num_graphs accuracies.append(np.array(batch_accuracies) * num_graphs) try: word_inputs = batch_data[self.placeholders['word_inputs']] las, uas, uas_e = self.humanize_batch_results( labels=labels, computed_values=computed_values, num_vertices=num_vertices, mask=node_mask, ids=sentences_id, adms=adjacency_matrix, labels_e=labels_edges, computed_values_e=computed_values_edges, mask_edges=node_mask_edges, word_inputs=word_inputs, target_pos=target_pos, out_file=csv_file) acc_las += las * num_graphs acc_uas += uas * num_graphs acc_uas_e += uas_e * num_graphs except: print('edge weights: %s' % edge_weights) print('edge bias: %s' % edge_biases) raise Exception('Apparent division by zero, comp_values: %s' % computed_values[0]) print("Running %s, batch %i (has %i graphs). Loss so far: %.4f" % (epoch_name, step, num_graphs, loss / processed_graphs), end='\r') steps += 1 all_labels.append(labels) all_labels_e.append(labels_edges) all_computed_values.append(computed_values) all_computed_values_e.append(computed_values_edges) all_num_vertices.append(num_vertices) all_masks.append(node_mask) all_masks_e.append(node_mask_edges) all_ids.append(sentences_id) all_adj_m.append(adjacency_matrix) accuracies = np.sum(accuracies, axis=0) / processed_graphs loss = loss / processed_graphs error_ratios = accuracies / chemical_accuracies[ self.params["task_ids"]] instance_per_sec = processed_graphs / (time.time() - start_time) acc_las = acc_las / processed_graphs acc_uas = acc_uas / processed_graphs acc_uas_e = acc_uas_e / processed_graphs return loss, accuracies, error_ratios, instance_per_sec, steps, acc_las, acc_uas, \ all_labels, all_computed_values, all_num_vertices, all_masks, \ all_ids, all_adj_m, all_labels_e, all_computed_values_e, all_masks_e, acc_uas_e
def run_epoch(self, epoch_name: str, data, is_training: bool): chemical_accuracies = np.array([ 0.066513725, 0.012235489, 0.071939046, 0.033730778, 0.033486113, 0.004278493, 0.001330901, 0.004165489, 0.004128926, 0.00409976, 0.004527465, 0.012292586, 0.037467458 ]) loss = 0 accuracies = [] start_time = time.time() processed_graphs = 0 accuracy_ops = [ self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids'] ] batch_iterator = ThreadedIterator(self.make_minibatch_iterator( data, is_training), max_queue_size=5) for step, batch_data in enumerate(batch_iterator): num_graphs = batch_data[self.placeholders['num_graphs']] processed_graphs += num_graphs if is_training: batch_data[self.placeholders[ 'out_layer_dropout_keep_prob']] = self.params[ 'out_layer_dropout_keep_prob'] fetch_list = [ self.ops['loss'], accuracy_ops, self.ops['train_step'] ] else: batch_data[ self.placeholders['out_layer_dropout_keep_prob']] = 1.0 fetch_list = [self.ops['loss'], accuracy_ops] val_1, val_2, val_3, val_4, val_5, val_6 = self.sess.run( [ self.ops['sigm_c'], self.ops['sigm_TP'], self.ops['sigm_FN'], self.ops['sigm_FP'], self.ops['sigm_TN'], self.ops['sigm_sum'] ], feed_dict=batch_data) val_R, val_P, val_F1, val_FPR = self.sess.run([ self.ops['sigm_Recall'], self.ops['sigm_Precision'], self.ops['sigm_F1'], self.ops['sigm_FPR'] ], feed_dict=batch_data) result = self.sess.run(fetch_list, feed_dict=batch_data) (batch_loss, batch_accuracies) = (result[0], result[1]) loss += batch_loss * num_graphs accuracies.append(np.array(batch_accuracies) * num_graphs) print("random seed: {}".format(self.random_seed)) print("sum: {}".format(val_6)) print("TP: {}".format(val_2)) print("FN: {}".format(val_3)) print("FP: {}".format(val_4)) print("TN: {}".format(val_5)) print("Recall: {}".format(val_R)) print("Precision: {}".format(val_P)) print("F1: {}".format(val_F1)) print("FPR: {}".format(val_FPR)) print("Running %s, batch %i (has %i graphs). " "Loss so far: %.4f" % (epoch_name, step, num_graphs, loss / processed_graphs), end='\r') accuracies = np.sum(accuracies, axis=0) / processed_graphs loss = loss / processed_graphs error_ratios = accuracies / chemical_accuracies[ self.params["task_ids"]] instance_per_sec = processed_graphs / (time.time() - start_time) return loss, accuracies, error_ratios, instance_per_sec