def run_evaluate(self, sess, test, vocab_tags, vocab_words): accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. for sentences, labels in minibatches(test, vocab_tags, vocab_words, self.config.batch_size): labels_pred, sequence_lengths = self.predict_batch(sess, sentences) for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred= lab_pred[:length] accs += [a == b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, vocab_tags)) lab_pred_chunks = set(get_chunks(lab_pred, vocab_tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p *r /(p+r) if correct_preds > 0 else 0 acc = np.mean(accs) return {"acc":100*acc, "f1":100*f1,"precision":100*p,"recall":100*r}
def evaluate(self, test): accuracy = [] correct_prediction = 0. total_correct = 0. total_prediction = 0. for word, label in minibatches(test, self.config.batch_size): label_predict, seq_len = self.predict_batch(word) for lb, lb_pred, length in zip(label, label_predict, seq_len): lb = lb[:length] lb_pred = lb_pred[:length] accuracy += [a == b for (a, b) in zip(lb, lb_pred)] lb_chunks = set(get_chunks(lb, self.config.vocab_tag)) lb_pred_chunks = set(get_chunks(lb_pred, self.config.vocab_tag)) correct_prediction += len(lb_chunks & lb_pred_chunks) total_prediction += len(lb_pred_chunks) total_correct += len(lb_chunks) precision = correct_prediction / total_prediction if correct_prediction > 0 else 0 recall = correct_prediction / total_correct if correct_prediction > 0 else 0 f1 = 2 * precision * recall / (precision + recall) if correct_prediction > 0 else 0 acc = np.mean(accuracy) return {"accuracy": 100 * acc, "f1-score": 100 * f1}
def run_evaluate(self, sess, test, tags): """ Evaluates performance on test set Args: sess: tensorflow session test: dataset that yields tuple of sentences, tags tags: {tag: index} dictionary Returns: accuracy f1 score """ accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. for words, labels in minibatches(test, self.config.batch_size): labels_pred, sequence_lengths = self.predict_batch(sess, words) for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] accs += [a==b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, tags)) lab_pred_chunks = set(get_chunks(lab_pred, tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) return acc, f1
def run_evaluate(self, sess, test, tags, target='src'): accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. nbatces = (len(test) + self.args.batch_size - 1) // self.args.batch_size prog = Progbar(target=nbatces) for i, (words, labels, target_words) in enumerate( minibatches(test, self.args.batch_size)): if target == 'src': labels_pred, sequence_lengths = self.predict_batch( sess, words, mode=target, is_training=False) else: labels_pred, sequence_lengths = self.predict_batch( sess, None, words, mode=target, is_training=False) for lab, label_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = label_pred[:length] accs += [a == b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, tags)) lab_pred_chunks = set(get_chunks(lab_pred, tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) prog.update(i + 1) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) return acc, p, r, f1
def run_evaluate(self, sess, test, tags): """ Evaluates performance on test set Args: sess: tensorflow session test: dataset that yields tuple of sentences, tags tags: {tag: index} dictionary Returns: accuracy f1 score """ accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. for words, labels in minibatches(test, self.config.batch_size): labels_pred, sequence_lengths = self.predict_batch(sess, words) for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] accs += [a == b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, tags)) lab_pred_chunks = set(get_chunks(lab_pred, tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) return acc, f1
def run_evaluate(self, test, log_step=None, mode='train'): """Evaluates performance on test set Args: test: dataset that yields tuple of (sentences, tags) get_loss: True, if you want to calculate validation loss Returns: metrics: (dict) metrics["acc"] = 98.4, ... """ accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. get_loss = self.config.early_stopping_metric == 'loss' if get_loss: loss = 0.0 weight = 0.0 for words, labels, pred_flags in minibatches(test, self.config.batch_size): if get_loss: labels_pred, sequence_lengths, batch_loss = self.predict_batch(words, labels=labels, pred_flags=pred_flags, get_loss=get_loss) _weight = len(sequence_lengths)/float(self.config.batch_size) weight += _weight loss += _weight * batch_loss else: labels_pred, sequence_lengths = self.predict_batch(words, get_loss, pred_flags=pred_flags) for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] accs += [a==b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, self.config.vocab_tags)) lab_pred_chunks = set(get_chunks(lab_pred, self.config.vocab_tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) if get_loss and mode =='train': loss = loss/weight dev_summary = self.sess.run(self.dev_merged, feed_dict={self.eval_loss: loss, self.eval_f1: f1}) self.dev_file_writer.add_summary(dev_summary, log_step) return {"acc": 100*acc, "f1": 100*f1, "loss": loss} if mode == 'evaluate': dataset_name = basename(normpath(test.filename)) self.save_evaluation_results(dataset_name,f1) return {"acc": 100*acc, "f1": 100*f1}
def run_evaluate(self, sess, test, tags): """ Evaluates performance on test set Args: sess: tensorflow session test: dataset that yields tuple of sentences, tags tags: {tag: index} dictionary Returns: accuracy f1 score """ accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. for words, labels, iob_gold, mention_type_gold, mentions_gold, word_features in minibatches(test, self.config.batch_size): iob_labels_pred, sequence_lengths= self.predict_iob_batch(sess, words, word_features) mentions = [] mention_sizes = [] count = 0 for i in range(self.config.batch_size): length = sequence_lengths[i] mention = find_mentions(iob_labels_pred[i][:length]) mentions.append(mention) mention_sizes.append(len(mention)) if len(mention) == 0: count += 1 if count != self.config.batch_size: mentions_pred, _ = self.predict_type_batch(sess, words, word_features, mentions) else: mentions_pred = [[]]*self.config.batch_size for lab, iob_pred, length, mention, mention_pred, mention_size in zip(labels, iob_labels_pred, sequence_lengths, mentions, mentions_pred, mention_sizes): lab = lab[:length] iob_pred = iob_pred[:length] mention_pred = mention_pred[:mention_size] lab_pred = find_labels(iob_pred, mention_pred, tags) accs += [a==b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, tags)) lab_pred_chunks = set(get_chunks(lab_pred, tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) return acc, f1
def run_evaluate(self, sess, test, tags): """ Evaluates performance on test set Args: sess: tensorflow session test: dataset that yields tuple of sentences, tags tags: {tag: index} dictionary Returns: accuracy f1 score """ accs = [] global Globepoch Globepoch += 1 if Globepoch >= 8: OutFile = open("Res1/AWS_GPU_BEST_" + str(Globepoch), 'w') correct_preds, total_correct, total_preds = 0., 0., 0. for words, labels in minibatches( test, self.config.batch_size ): ## here raw words and tags from main.py is starting to get converted into word to id's and tag to id's labels_pred, sequence_lengths = self.predict_batch(sess, words) for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] accs += [a == b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, tags)) lab_pred_chunks = set(get_chunks(lab_pred, tags)) test2lab = label2ind_ret() # print (test2lab) if Globepoch >= 8: for lab1 in lab_pred: OutFile.write(test2lab[lab1] + "\n") OutFile.write("\n") correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) return acc, f1
def run_evaluate(self, test, print_or_not = False): accs = [] intent_correct = 0 intent_total = 0 correct_preds, total_correct, total_preds = 0., 0., 0. for words, labels, intents, all_tags in minibatches(test, self.config.batch_size): labels_pred, sequence_lengths, pred_intents, score = self.predict_batch(words, all_tags) for word_ins, lab, lab_pred, length, intent, pred_intent in\ zip(words, labels, labels_pred, sequence_lengths, intents, pred_intents): if print_or_not: #words_list = [str(a) for a in words_ins] #lab_list = [str(a) for a in lab] #lab_pred_list = [str(a) for a in lab_pred ] words_list = [self.config.idx2vocab[a] for a in word_ins] lab_list = [self.config.idx2tag[a] for a in lab] lab_pred_list = [self.config.idx2tag[a] for a in lab_pred ] print "||".join(words_list) + "\t" + "||".join(lab_list) \ + "\t" + "||".join(lab_pred_list) + "\t" \ + str(self.config.idx2intent[intent]) + "\t"\ + str(self.config.idx2intent[pred_intent]) lab = lab[:length] lab_pred = lab_pred[:length] accs += [a==b for (a,b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, self.config.vocab_tags)) lab_pred_chunks = set(get_chunks(lab_pred,self.config.vocab_tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) intent_total += 1 if pred_intent == intent: intent_correct += 1 p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) if intent_total != 0: intent_acc = intent_correct / float(intent_total) else: intent_acc = 0 return {"acc": 100*acc, "f1": 100*f1, "intent_acc": 100* intent_acc, \ "intent_correct": intent_correct, "intent_total": intent_total}
def run_evaluate(self, sess, test, tags): """ Evaluates performance on test set Args: sess: tensorflow session test: dataset that yields tuple of sentences, tags tags: {tag: index} dictionary Returns: accuracy f1 score """ accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. output_file = codecs.open("output", 'w', 'UTF-8') idx_to_tag = {idx: tag for tag, idx in tags.items()} for words, labels in minibatches(test, self.config.batch_size): labels_pred, sequence_lengths = self.predict_batch(sess, words) for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] accs += [a == b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, tags)) lab_pred_chunks = set(get_chunks(lab_pred, tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) output_string = "" for b, c in zip(lab, lab_pred): split_line = [idx_to_tag[b], idx_to_tag[c]] output_string += ' '.join(split_line) + '\n' output_file.write(output_string + '\n') p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) output_file.close() return acc, f1
def run_evaluate(self, sess, test, tags, test_flag): """ Evaluates performance on test set Args: sess: tensorflow session test: dataset that yields tuple of sentences, tags tags: {tag: index} dictionary Returns: accuracy f1 score """ #trie setting self.lis1 = [] self.lis2 = [] self.lis3 = [] self.lis4 = [] self.lis5 = [] trie.gazette(self.lis1, "data/dic/gazette.txt") trie.gazette(self.lis2, "data/dic/thres3.txt") trie.gazette_DTTI(self.lis3, "data/dic/DT_analysis.txt") trie.gazette_DTTI(self.lis4, "data/dic/TI_analysis.txt") trie.gazette(self.lis5, "data/dic/wiki_PS.txt") fresult = open("results/result.txt", "w") accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. # for i, (words, fw_words, bw_words, labels, postags) in enumerate(minibatches(train, self.config.batch_size)): # fd, _ = self.get_feed_dict(words, fw_words, bw_words, labels, self.config.lr, self.config.dropout) total_chunks = [] for words, fw_words, bw_words, labels, postags, sentences, print_line in minibatches( test, self.config.batch_size): dict_labels = self.dict_trie(sentences) labels_pred, sequence_lengths = self.predict_batch( sess, words, fw_words, bw_words, dict_labels, labels, print_line, test_flag) line_num = 0 for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] accs += [a == b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, tags)) lab_pred_chunks = set(get_chunks(lab_pred, tags)) #------------------------------------------------------- #print(lab_pred_chunks) if test_flag == 1: #print(print_line[line_num][1]) fresult.write(print_line[line_num][0] + '\n') #fresult.write(print_line[line_num][1]+'\n') print_chunks = list(lab_pred_chunks) print_chunks.sort(key=lambda chunks: chunks[1]) #print(print_chunks) for tag, start, end in print_chunks: print_tag = '' if tag.decode() == 'B_PS': print_tag = 'PS' elif tag.decode() == 'B_LC': print_tag = 'LC' elif tag.decode() == 'B_DT': print_tag = 'DT' elif tag.decode() == 'B_TI': print_tag = 'TI' elif tag.decode() == 'B_OG': print_tag = 'OG' else: print_tag = tag.decode() #print(print_tag+'\t'+str(start)+'\t'+str(end)+'\t'+print_line[line_num][start+2].split()[1]) fresult.write(print_line[line_num][start + 1].split()[1] + '\t' + print_tag + '\n') #print("") fresult.write('\n') line_num = line_num + 1 correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) #self.print_results(total_chunks) return acc, f1, p, r
def evaluate(args, model, tokenizer, labels, pad_token_label_id, best, mode, prefix="", verbose=True): eval_dataset = load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, mode=mode) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) eval_sampler = SequentialSampler( eval_dataset) if args.local_rank == -1 else DistributedSampler( eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # multi-gpu evaluate if args.n_gpu > 1: model = torch.nn.DataParallel(model) logger.info("***** Running evaluation %s *****", prefix) if verbose: logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None model.eval() for batch in tqdm(eval_dataloader, desc="Evaluating"): batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3] } if args.model_type != "distilbert": inputs["token_type_ids"] = ( batch[2] if args.model_type in ["bert", "xlnet"] else None ) # XLM and RoBERTa don"t use segment_ids outputs = model(**inputs) tmp_eval_loss, logits = outputs[:2] if args.n_gpu > 1: tmp_eval_loss = tmp_eval_loss.mean() eval_loss += tmp_eval_loss.item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs["labels"].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps preds = np.argmax(preds, axis=2) label_map = {i: label for i, label in enumerate(labels)} preds_list = [[] for _ in range(out_label_ids.shape[0])] out_id_list = [[] for _ in range(out_label_ids.shape[0])] preds_id_list = [[] for _ in range(out_label_ids.shape[0])] for i in range(out_label_ids.shape[0]): for j in range(out_label_ids.shape[1]): if out_label_ids[i, j] != pad_token_label_id: preds_list[i].append(label_map[preds[i][j]]) out_id_list[i].append(out_label_ids[i][j]) preds_id_list[i].append(preds[i][j]) correct_preds, total_correct, total_preds = 0., 0., 0. # i variables for ground_truth_id, predicted_id in zip(out_id_list, preds_id_list): # We use the get chunks function defined above to get the true chunks # and the predicted chunks from true labels and predicted labels respectively lab_chunks = set(get_chunks(ground_truth_id, tag_to_id(args.data_dir))) lab_pred_chunks = set( get_chunks(predicted_id, tag_to_id(args.data_dir))) # Updating the i variables correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 new_F = 2 * p * r / (p + r) if correct_preds > 0 else 0 is_updated = False if new_F > best[-1]: best = [p, r, new_F] is_updated = True results = { "loss": eval_loss, "precision": p, "recall": r, "f1": new_F, "best_precision": best[0], "best_recall": best[1], "best_f1": best[-1] } logger.info("***** Eval results %s *****", prefix) for key in sorted(results.keys()): logger.info(" %s = %s", key, str(results[key])) return results, preds_list, best, is_updated
def run_evaluate(self, sess, test, tags): """ Evaluates performance on test set Args: sess: tensorflow session test: dataset that yields tuple of sentences, tags tags: {tag: index} dictionary Returns: accuracy f1 score """ accs = [] f = open( '/home/chiyoon/python/seq/sequence_tagging/predict/prediction.txt', 'w') f2 = open( '/home/chiyoon/python/seq/sequence_tagging/predict/output.txt', 'w') correct_preds, total_correct, total_preds = 0., 0., 0. for words, labels, pos, inputs, position in minibatches( test, self.config.batch_size): labels_pred, sequence_lengths = self.predict_batch( sess, words, pos) for lab, lab_pred, length, wo, idx in zip(labels, labels_pred, sequence_lengths, inputs, position): lab = lab[:length] lab_pred = lab_pred[:length] ### _label = [] _predict = [] _w = [] ########## skip_lab = [] skip_pred = [] skip_w = [] skip_idx = [] for (w, id, label, pred) in zip(wo, idx, lab, lab_pred): _w = w.split("/")[1] if _w != "BL": #print(w) skip_w.append(w) skip_lab.append(label) skip_pred.append(pred) skip_idx.append(id) lab = skip_lab lab_pred = skip_pred ########## idx_to_tag = {idx: tag for tag, idx in tags.items()} error = False for (label, pred) in zip(skip_lab, skip_pred): _label.append(idx_to_tag[label]) _predict.append(idx_to_tag[pred]) if label != pred: error = True for (w, po, pre, label) in zip(skip_w, skip_idx, _predict, _label): if pre == label: f2.write("{}\t{}\t{}\n".format(w, pre, label)) else: f2.write( "{}\t{}\t{} <<<<<<<<<<<<<<<<<<<<<<<<<<\n".format( w, pre, label)) f2.write("\n") ########### result for (w, po, pre, label) in zip(skip_w, skip_idx, _predict, _label): word = "" for i in w.split("/")[0:-1]: word += i f.write("{}\t{}\t{}\n".format(word, pre, po)) #print("{}\t{}\t{}\n".format(word, pre, po)) f.write("\n") #print("\n") ################# ### accs += [a == b for (a, b) in zip(skip_lab, skip_pred)] lab_chunks = set(get_chunks(skip_lab, tags)) lab_pred_chunks = set(get_chunks(skip_pred, tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) tf.summary.scalar("accuracy", acc) tf.summary.scalar("f1", f1) #f.write("f1 scroe : {}".format(f1)) f2.write("f1 scroe : {}".format(f1)) print("precision : {}".format(p)) print("recall : {}".format(r)) print("f1 scroe : {}".format(f1)) return acc, f1
def fitness(molecules_here, properties_calc_ls, discriminator, disc_enc_type, generation_index, max_molecules_len, device, num_processors, writer, beta, data_dir, max_fitness_collector, impose_time_adapted_pen): ''' Calculate fitness of a population All properties are standardized based on the mean & std of ZINC Parameters: molecules_here (list) : List of a string of molecules properties_calc_ls : Type of property to be shown to the discriminator discriminator (torch.Model) : Pytorch classifier disc_enc_type (string) : Indicated type of encoding shown to discriminator generation_index (int) : Index of generation max_molecules_len (int) : Largest mol length device (string) : Device for neural network num_processors (int) : Number of CPUs writer (tensorboardX writer obj) : Tensorboard beta (int) : Discriminator weight data_dir (str) : Data Directory max_fitness_collector (list) : List for max fitness values impose_time_adapted_pen (bool) : Impose distribution shift with discriminator Returns: fitness (np.array) : Combination of properties and discriminator score discriminator_predictions (np.array) : The predictions made by the discriminator ''' dataset_x = du.get_dis_encoding(molecules_here, disc_enc_type, max_molecules_len, num_processors, generation_index) if generation_index == 1: discriminator_predictions = np.zeros((len(dataset_x), 1)) else: discriminator_predictions = D.predict(discriminator, dataset_x, device) if properties_calc_ls == None: fitness = discriminator_predictions else: molecules_here_unique = list(set(molecules_here)) ratio = len(molecules_here_unique) / num_processors chunks = du.get_chunks(molecules_here_unique, num_processors, ratio) chunks = [item for item in chunks if len(item) >= 1] logP_results, SAS_results, ringP_results, QED_results = {}, {}, {}, {} if 'logP' in properties_calc_ls: logP_results = create_parr_process(chunks, 'logP') if 'SAS' in properties_calc_ls: SAS_results = create_parr_process(chunks, 'SAS') if 'RingP' in properties_calc_ls: ringP_results = create_parr_process(chunks, 'RingP') if 'QED' in properties_calc_ls: QED_results = {} for smi in molecules_here: QED_results[smi] = Chem.QED.qed(Chem.MolFromSmiles(smi)) logP_calculated, SAS_calculated, RingP_calculated, logP_norm, SAS_norm, RingP_norm, QED_results = standardize_properties( molecules_here, logP_results, SAS_results, ringP_results, QED_results, properties_calc_ls) fitness = (logP_norm) - (SAS_norm) - (RingP_norm) writer.add_scalar('max fitness without discr', max(fitness), generation_index) writer.add_scalar('avg fitness without discr', fitness.mean(), generation_index) max_fitness_collector.append(max(fitness)[0]) #Use discriminator to shift distribution of population if impose_time_adapted_pen: if generation_index > 100: if len(set(max_fitness_collector[-5:])) == 1: beta = 1000 print('Beta cutoff imposed index: ', generation_index) f = open('{}/beta_change_log.txt'.format(data_dir), 'a+') f.write(str(generation_index) + '\n') f.close() # Max fitness without discriminator f = open('{}/max_fitness_no_discr.txt'.format(data_dir), 'a+') f.write(str(max(fitness)[0]) + '\n') f.close() # Avg fitness without discriminator f = open('{}/avg_fitness_no_discr.txt'.format(data_dir), 'a+') f.write(str(fitness.mean()) + '\n') f.close() print('beta value: ', beta) fitness = (beta * discriminator_predictions) + fitness # Plot fitness with discriminator writer.add_scalar('max fitness with discrm', max(fitness), generation_index) writer.add_scalar('avg fitness with discrm', fitness.mean(), generation_index) # Max fitness with discriminator f = open('{}/max_fitness_discr.txt'.format(data_dir), 'a+') f.write(str(max(fitness)[0]) + '\n') f.close() # Avg fitness with discriminator f = open('{}/avg_fitness_discr.txt'.format(data_dir), 'a+') f.write(str(fitness.mean()) + '\n') f.close() # Plot properties writer.add_scalar('non standr max logp', max(logP_calculated), generation_index) # logP plots writer.add_scalar('non standr mean logp', logP_calculated.mean(), generation_index) writer.add_scalar('non standr min sas', min(SAS_calculated), generation_index) # SAS plots writer.add_scalar('non standr mean sas', SAS_calculated.mean(), generation_index) writer.add_scalar('non standr min ringp', min(RingP_calculated), generation_index) # RingP plots writer.add_scalar('non standr mean ringp', RingP_calculated.mean(), generation_index) # max logP f = open('{}/max_logp.txt'.format(data_dir), 'a+') f.write(str(max(logP_calculated)) + '\n') f.close() # mean logP f = open('{}/avg_logp.txt'.format(data_dir), 'a+') f.write(str(logP_calculated.mean()) + '\n') f.close() # min SAS f = open('{}/min_SAS.txt'.format(data_dir), 'a+') f.write(str(min(SAS_calculated)) + '\n') f.close() # mean SAS f = open('{}/avg_SAS.txt'.format(data_dir), 'a+') f.write(str(SAS_calculated.mean()) + '\n') f.close() # min RingP f = open('{}/min_RingP.txt'.format(data_dir), 'a+') f.write(str(min(RingP_calculated)) + '\n') f.close() # mean RingP f = open('{}/avg_RingP.txt'.format(data_dir), 'a+') f.write(str(RingP_calculated.mean()) + '\n') f.close() return fitness, logP_calculated, SAS_calculated, RingP_calculated, discriminator_predictions
def test_chunk(): tags_dict = load_vocab("../data/tags.txt") seq = [10, 3, 6, 12, 12, 6] chunks = get_chunks(seq, tags_dict) return chunks
def run_evaluate(self, sess, test, test_deps, vocab_words, vocab_tags, print_test_results=False): """ Evaluates performance on test set """ idx_to_words = {} if print_test_results: idx_to_words = {idx: word for word, idx in vocab_words.iteritems()} test_accs = [] self.config.istrain = False # set to test first, #batch normalization# correct_preds, total_correct, total_preds = 0., 0., 0. for words, poss, chunks, labels, \ btup_idx_list, btup_words_list, btup_depwords_list, btup_deprels_list, btup_depwords_length_list, \ upbt_idx_list, upbt_words_list, upbt_depwords_list, upbt_deprels_list, upbt_depwords_length_list, \ btup_formidx_list, upbt_formidx_list in minibatches(test, test_deps, self.config.batch_size): labels_pred, sequence_lengths = self.predict_batch( sess, words, poss, chunks, btup_idx_list, btup_words_list, btup_depwords_list, btup_deprels_list, btup_depwords_length_list, upbt_idx_list, upbt_words_list, upbt_depwords_list, upbt_deprels_list, upbt_depwords_length_list, btup_formidx_list, upbt_formidx_list) if print_test_results: char_ids, word_ids = zip(*words) index = 0 for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] test_accs += map(lambda a_b: a_b[0] == a_b[1], zip(lab, lab_pred)) lab_chunks = set(get_chunks(lab, vocab_tags)) lab_pred_chunks = set(get_chunks(lab_pred, vocab_tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) if print_test_results: self.logger.info(" ".join( [idx_to_words[w] for w in word_ids[index][:length]])) self.logger.info(" ".join( self.get_aspect_polarity_pairs(lab_chunks))) self.logger.info(" ".join( self.get_aspect_polarity_pairs(lab_pred_chunks))) index += 1 p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 test_acc = np.mean(test_accs) return p, r, f1, test_acc
def run_evaluate(self, sess, test, vocab_aspect_tags, vocab_polarity_tags, vocab_joint_tags, vocab_words, is_dev=True): """ Evaluates performance on test set """ self.config.istrain = False # set to test first, #batch normalization# idx_to_words = {} if self.config.show_test_results: idx_to_words = {idx: word for word, idx in vocab_words.iteritems()} losses = [] aspect_test_accs, polarity_test_accs = [], [] aspect_correct_preds, aspect_total_correct, aspect_total_preds = 0., 0., 0. polarity_correct_preds, polarity_total_correct, polarity_total_preds = 0., 0., 0. for words, poss, chunks, labels_aspect, labels_polarity, labels_joint in minibatches_for_sequence( test, self.config.test_batch_size): if self.config.show_test_results: if type(words) == tuple: char_ids, word_ids = zip(*words) else: char_ids, word_ids = [], words aspect_lab_chunks = [] aspect_lab_pred_chunks = [] # Just used to evaluate Aspect labels_pred, sequence_lengths = self.predict_batch( sess, words, poss, chunks, vocab_words, self.aspect_logits, self.aspect_transition_params, self.aspect_pred) for lab, lab_pred, length in zip(labels_aspect, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] aspect_test_accs += map(lambda a_b: a_b[0] == a_b[1], zip(lab, lab_pred)) lab_chunks = get_chunks(lab, vocab_aspect_tags) aspect_lab_chunks.append(lab_chunks) lab_chunks = set(lab_chunks) lab_pred_chunks = get_chunks(lab_pred, vocab_aspect_tags) aspect_lab_pred_chunks.append(lab_pred_chunks) lab_pred_chunks = set(lab_pred_chunks) aspect_correct_preds += len(lab_chunks & lab_pred_chunks) aspect_total_preds += len(lab_pred_chunks) aspect_total_correct += len(lab_chunks) # Just used to evaluate Polarity labels_pred, sequence_lengths = self.predict_batch( sess, words, poss, chunks, vocab_words, self.polarity_logits, self.polarity_transition_params, self.polarity_pred) index = 0 for lab, lab_pred, length in zip(labels_polarity, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] polarity_test_accs += map(lambda a_b: a_b[0] == a_b[1], zip(lab, lab_pred)) lab_chunks = set( get_polaity_chunks(lab, vocab_polarity_tags, aspect_lab_chunks[index])) lab_pred_chunks = set( get_polaity_chunks(lab_pred, vocab_polarity_tags, aspect_lab_pred_chunks[index])) polarity_correct_preds += len(lab_chunks & lab_pred_chunks) polarity_total_preds += len(lab_pred_chunks) polarity_total_correct += len(lab_chunks) if self.config.show_test_results: self.logger.info(" ".join( [idx_to_words[w] for w in word_ids[index][:length]])) self.logger.info("T: " + " ".join( self.get_aspect_polarity_pairs( aspect_lab_chunks[index], lab_chunks))) self.logger.info("P: " + " ".join( self.get_aspect_polarity_pairs( aspect_lab_pred_chunks[index], lab_pred_chunks))) index += 1 # get loss fd, sequence_lengths = self.get_feed_dict( words, poss, chunks, labels_aspect=labels_aspect, labels_polarity=labels_polarity, labels_joint=labels_joint, dropout=1.0, vocab_aspect_tags=vocab_aspect_tags) dev_loss = sess.run(self.loss, feed_dict=fd) losses.append(dev_loss) aspect_p, aspect_r, aspect_f1 = self.cacul_f1(aspect_correct_preds, aspect_total_preds, aspect_total_correct) aspect_test_acc = np.mean(aspect_test_accs) polarity_p, polarity_r, polarity_f1 = self.cacul_f1( polarity_correct_preds, polarity_total_preds, polarity_total_correct) polarity_test_acc = np.mean(polarity_test_accs) return aspect_p, aspect_r, aspect_f1, aspect_test_acc, polarity_p, polarity_r, polarity_f1, polarity_test_acc, sum( losses) / len(losses)