def __iterations_compute(self, x_current, y_current, z_current, mode, coord): data_iterations = self.__select_items(coord) u = self.__select_u(coord) for i in data_iterations: d = self.__select_d(mode, y_current, z_current) f = self.__select_f(coord, d, i) x_next = int(x_current - u * ((d * y_current) / (2 ** i))) y_next = int(y_current + ((d * x_current) / (2 ** i))) z_next = int(z_current - f) x_current, y_current, z_current = x_next, y_next, z_next x = decoding(x_current, self.__resolution) y = decoding(y_current, self.__resolution) z = decoding(z_current, self.__resolution) return x, y, z
def evaluate(model, criterion, data_loader, file_path, mode): """ mode eval: eval on development set and compute P/R/F1, called between training. mode predict: eval on development / test set, then write predictions to \ predict_test.json and predict_test.json.zip \ under args.data_path dir for later submission or evaluation. """ example_all = [] with open(file_path, "r", encoding="utf-8") as fp: for line in fp: example_all.append(json.loads(line)) id2spo_path = os.path.join(os.path.dirname(file_path), "id2spo.json") with open(id2spo_path, 'r', encoding='utf8') as fp: id2spo = json.load(fp) model.eval() loss_all = 0 eval_steps = 0 formatted_outputs = [] current_idx = 0 for batch in tqdm(data_loader, total=len(data_loader)): eval_steps += 1 input_ids, seq_len, tok_to_orig_start_index, tok_to_orig_end_index, labels = batch logits = model(input_ids=input_ids) mask = (input_ids != 0).logical_and((input_ids != 1)).logical_and( (input_ids != 2)) loss = criterion(logits, labels, mask) loss_all += loss.numpy().item() probs = F.sigmoid(logits) logits_batch = probs.numpy() seq_len_batch = seq_len.numpy() tok_to_orig_start_index_batch = tok_to_orig_start_index.numpy() tok_to_orig_end_index_batch = tok_to_orig_end_index.numpy() formatted_outputs.extend( decoding(example_all[current_idx:current_idx + len(logits)], id2spo, logits_batch, seq_len_batch, tok_to_orig_start_index_batch, tok_to_orig_end_index_batch)) current_idx = current_idx + len(logits) loss_avg = loss_all / eval_steps print("eval loss: %f" % (loss_avg)) if mode == "predict": predict_file_path = os.path.join(args.data_path, 'predictions.json') else: predict_file_path = os.path.join(args.data_path, 'predict_eval.json') predict_zipfile_path = write_prediction_results(formatted_outputs, predict_file_path) if mode == "eval": precision, recall, f1 = get_precision_recall_f1( file_path, predict_zipfile_path) os.system('rm {} {}'.format(predict_file_path, predict_zipfile_path)) return precision, recall, f1 elif mode != "predict": raise Exception("wrong mode for eval func")
def predict(args, ext_model, cls_model, tokenizer, ext_id2label, cls_id2label): ext_model.eval() cls_model.eval() while True: input_text = input("input text: \n") if not input_text: continue if input_text == "quit": break input_text = input_text.strip().replace(" ", "") # processing input text encoded_inputs = tokenizer(list(input_text), is_split_into_words=True, max_seq_len=args.ext_max_seq_len) input_ids = paddle.to_tensor([encoded_inputs["input_ids"]]) token_type_ids = paddle.to_tensor([encoded_inputs["token_type_ids"]]) # extract aspect and opinion words logits = ext_model(input_ids, token_type_ids=token_type_ids) predictions = logits.argmax(axis=2).numpy()[0] tag_seq = [ext_id2label[idx] for idx in predictions][1:-1] aps = decoding(input_text[:args.ext_max_seq_len - 2], tag_seq) # predict sentiment for aspect with cls_model results = [] for ap in aps: aspect = ap[0] opinion_words = list(set(ap[1:])) aspect_text = concate_aspect_and_opinion(input_text, aspect, opinion_words) encoded_inputs = tokenizer(aspect_text, text_pair=input_text, max_seq_len=args.cls_max_seq_len, return_length=True) input_ids = paddle.to_tensor([encoded_inputs["input_ids"]]) token_type_ids = paddle.to_tensor( [encoded_inputs["token_type_ids"]]) logits = cls_model(input_ids, token_type_ids=token_type_ids) prediction = logits.argmax(axis=1).numpy()[0] result = { "aspect": aspect, "opinions": opinion_words, "sentiment_polarity": cls_id2label[prediction] } results.append(result) format_print(results)
def predict_ext(self, args): ori_test_ds = load_dataset(read_test_file, data_path=args.test_path, lazy=False) trans_func = partial(convert_example_to_feature_ext, tokenizer=self.tokenizer, label2id=self.ext_label2id, max_seq_len=args.ext_max_seq_len, is_test=True) test_ds = copy.copy(ori_test_ds).map(trans_func, lazy=False) batch_list = [ test_ds[idx:idx + args.batch_size] for idx in range(0, len(test_ds), args.batch_size) ] batchify_fn = lambda samples, fn=Tuple( Pad(axis=0, pad_val=self.tokenizer.pad_token_id, dtype="int64"), Pad(axis=0, pad_val=self.tokenizer.pad_token_type_id, dtype="int64"), Stack(dtype="int64")): fn(samples) results = [] for bid, batch_data in enumerate(batch_list): input_ids, token_type_ids, seq_lens = batchify_fn(batch_data) self.ext_input_handles[0].copy_from_cpu(input_ids) self.ext_input_handles[1].copy_from_cpu(token_type_ids) self.ext_predictor.run() logits = self.ext_output_hanle.copy_to_cpu() predictions = logits.argmax(axis=2) for eid, (seq_len, prediction) in enumerate(zip(seq_lens, predictions)): idx = bid * args.batch_size + eid tag_seq = [ self.ext_id2label[idx] for idx in prediction[:seq_len][1:-1] ] text = ori_test_ds[idx]["text"] aps = decoding(text[:args.ext_max_seq_len - 2], tag_seq) for aid, ap in enumerate(aps): aspect, opinions = ap[0], list(set(ap[1:])) aspect_text = self._concate_aspect_and_opinion( text, aspect, opinions) results.append({ "id": str(idx) + "_" + str(aid), "aspect": aspect, "opinions": opinions, "text": text, "aspect_text": aspect_text }) return results
def show_results(resolution=14): """ Files are read in alphabetical order 1. Coordinate System 2. Enable 3. Mode 4. X Python Values - Compute with numpy 5. X VHDL Values 6. Y Python Values - Compute with numpy 7. Y VHDL Values 8. Z Python Values - Compute with numpy 9. Z VHDL Values """ real_values = create_files_to_simulate() sin_python, cos_python, arctan_python, sinh_python, cosh_python, arctanh_python, axes_circular_python, axes_hyperbolic_python, axes_arctanh_python = real_values data_output = read_files() coord, enable, mode = data_output[0], data_output[1], data_output[2] x, y, z = data_output[4], data_output[6], data_output[8] sin, cos, arctan = [], [], [] sinh, cosh, arctanh = [], [], [] for index in range(len(enable)): if enable[index] == 1: # If the module is enabled if coord[ index] == 0: # If the module is configurate in circular coordinate system if mode[index] == 0: # If the module is operating in rotation mode cos.append(decoding(x[index], resolution)) sin.append(decoding(y[index], resolution)) else: # If the module is operating in vectoring mode arctan.append(rad_to_deg(decoding(z[index], resolution))) else: # If the module is configure in hyperbolic coordinate system if mode[index] == 0: # If the module is operating in rotation mode cosh.append(decoding(x[index], resolution)) sinh.append(decoding(y[index], resolution)) else: # If the module is operating in vectoring mode arctanh.append(rad_to_deg(decoding(z[index], resolution))) plot_results(cos, cos_python, axes_circular_python, 'Cos') plot_results(sin, sin_python, axes_circular_python, 'Sin') plot_results(arctan, arctan_python, axes_circular_python, 'Arctan') plot_results(sinh, sinh_python, axes_hyperbolic_python, 'Sinh') plot_results(cosh, cosh_python, axes_hyperbolic_python, 'Cosh') plot_results(arctanh, arctanh_python, axes_arctanh_python, 'Arctanh')
def evaluate(model, criterion, data_loader, test_loss, file_path, mode): """ mode eval: eval on development set and compute P/R/F1, called between training. mode predict: eval on development / test set, then write predictions to \ predict_test.json and predict_test.json.zip \ under args.data_path dir for later submission or evaluation. """ probs_all = None seq_len_all = None tok_to_orig_start_index_all = None tok_to_orig_end_index_all = None loss_all = 0 eval_steps = 0 for batch in tqdm(data_loader): eval_steps += 1 input_ids, seq_len, tok_to_orig_start_index, tok_to_orig_end_index, labels = batch logits = model(input_ids=input_ids) mask = (input_ids != 0).logical_and((input_ids != 1)).logical_and((input_ids != 2)) loss = criterion((logits, labels, mask)) loss_all += test_loss(loss).result() probs = logits if probs_all is None: probs_all = probs.numpy() seq_len_all = seq_len.numpy() tok_to_orig_start_index_all = tok_to_orig_start_index.numpy() tok_to_orig_end_index_all = tok_to_orig_end_index.numpy() else: probs_all = np.append(probs_all, probs.numpy(), axis=0) seq_len_all = np.append(seq_len_all, seq_len.numpy(), axis=0) tok_to_orig_start_index_all = np.append( tok_to_orig_start_index_all, tok_to_orig_start_index.numpy(), axis=0) tok_to_orig_end_index_all = np.append( tok_to_orig_end_index_all, tok_to_orig_end_index.numpy(), axis=0) loss_avg = loss_all / eval_steps print("eval loss: %f" % (loss_avg)) id2spo_path = os.path.join(os.path.dirname(file_path), "id2spo.json") with open(id2spo_path, 'r', encoding='utf8') as fp: id2spo = json.load(fp) formatted_outputs = decoding(file_path, id2spo, probs_all, seq_len_all, tok_to_orig_start_index_all, tok_to_orig_end_index_all) if mode == "predict": predict_file_path = os.path.join(args.data_path, 'predictions.json') else: predict_file_path = os.path.join(args.data_path, 'predict_eval.json') predict_zipfile_path = write_prediction_results(formatted_outputs, predict_file_path) if mode == "eval": precision, recall, f1 = get_precision_recall_f1(file_path, predict_zipfile_path) os.system('rm {} {}'.format(predict_file_path, predict_zipfile_path)) return precision, recall, f1 elif mode != "predict": raise Exception("wrong mode for eval func")
def doccano2SA(doccano_file, save_ext_dir, save_cls_dir, splits=[0.8, 0.9], is_shuffle=True): """ @Description: Consvert doccano file to data format which is suitable to input to this Application. @Param doccano_file: The annotated file exported from doccano labeling platform. @Param save_ext_dir: The directory of ext data that you wanna save. @Param save_cls_dir: The directory of cls data that you wanna save. @Param splits: Whether to split doccano file into train/dev/test, note: Only []/ len(splits)==2 accepted. @Param is_shuffle: Whether to shuffle data. """ if not os.path.exists(doccano_file): raise ValueError("Please input the correct path of doccano file.") if not os.path.exists(save_ext_dir): os.makedirs(save_ext_dir) if not os.path.exists(save_cls_dir): os.makedirs(save_cls_dir) if len(splits) != 0 and len(splits) != 2: raise ValueError("Only []/ len(splits)==2 accepted for splits.") if splits and (splits[0] >= splits[1] or splits[0] >= 1.0 or splits[1] >= 1.0 or splits[0] <= 0. or splits[1] <= 0): raise ValueError( "Please set correct splits, the element in it should be in (0,1), and splits[1]>splits[0]." ) def label_ext_with_label_term(ext_label, start, end, tag): if tag == "Opinion": b_tag = "B-Opinion" i_tag = "I-Opinion" else: b_tag = "B-Aspect" i_tag = "I-Aspect" ext_label[start] = b_tag for i in range(start + 1, end): ext_label[i] = i_tag ext_examples, cls_examples = [], [] with open(doccano_file, "r", encoding="utf-8") as f: raw_examples = f.readlines() # start to label for ext and cls data for line in raw_examples: items = json.loads(line) text, label_terms = items["data"], items["label"] # label ext data with label_terms ext_label = ["O"] * len(text) aspect_mapper = {} for label_term in label_terms: start, end, tag = label_term label_ext_with_label_term(ext_label, start, end, tag) if tag == "Pos-Aspect": aspect_mapper[text[start:end]] = "1" elif tag == "Neg-Aspect": aspect_mapper[text[start:end]] = "0" ext_examples.append((text, " ".join(ext_label))) # label cls data aps = decoding(text, ext_label) for ap in aps: aspect, opinions = ap[0], list(set(ap[1:])) if aspect not in aspect_mapper: continue aspect_text = concate_aspect_and_opinion(text, aspect, opinions) cls_examples.append((aspect_mapper[aspect], aspect_text, text)) # index for saving data ext_idx = np.arange(len(ext_examples)) cls_idx = np.arange(len(cls_examples)) if is_shuffle: ext_idx = np.random.permutation(ext_idx) cls_idx = np.random.permutation(cls_idx) if len(splits) == 0: # save ext data save_ext_path = os.path.join(save_ext_dir, "doccano.txt") save_examples(ext_examples, save_ext_path, ext_idx) print(f"\next: save data to {save_ext_path}.") # save cls data save_cls_path = os.path.join(save_cls_dir, "doccano.txt") save_examples(cls_examples, save_cls_path, cls_idx) print(f"\ncls: save data to {save_cls_path}.") else: # save ext data eth1, eth2 = int(len(ext_examples) * splits[0]), int( len(ext_examples) * splits[1]) save_ext_train_path = os.path.join(save_ext_dir, "train.txt") save_ext_dev_path = os.path.join(save_ext_dir, "dev.txt") save_ext_test_path = os.path.join(save_ext_dir, "test.txt") save_examples(ext_examples, save_ext_train_path, ext_idx[:eth1]) save_examples(ext_examples, save_ext_dev_path, ext_idx[eth1:eth2]) save_examples(ext_examples, save_ext_test_path, ext_idx[eth2:]) print(f"\next: save train data to {save_ext_train_path}.") print(f"ext: save dev data to {save_ext_dev_path}.") print(f"ext: save test data to {save_ext_test_path}.") # save cls data cth1, cth2 = int(len(cls_examples) * splits[0]), int( len(cls_examples) * splits[1]) save_cls_train_path = os.path.join(save_cls_dir, "train.txt") save_cls_dev_path = os.path.join(save_cls_dir, "dev.txt") save_cls_test_path = os.path.join(save_cls_dir, "test.txt") save_examples(cls_examples, save_cls_train_path, cls_idx[:cth1]) save_examples(cls_examples, save_cls_dev_path, cls_idx[cth1:cth2]) save_examples(cls_examples, save_cls_test_path, cls_idx[cth2:]) print(f"\ncls: save train data to {save_cls_train_path}.") print(f"cls: save dev data to {save_cls_dev_path}.") print(f"cls: save test data to {save_cls_test_path}.") # save ext dict ext_dict_path = os.path.join(save_ext_dir, "label.dict") cls_dict_path = os.path.join(save_cls_dir, "label.dict") save_dict(ext_dict_path, "ext") save_dict(cls_dict_path, "cls") print(f"\next: save dict to {ext_dict_path}.") print(f"cls: save dict to {cls_dict_path}.")
def predict_ext(args): # load dict model_name = "skep_ernie_1.0_large_ch" ext_label2id, ext_id2label = load_dict(args.ext_label_path) tokenizer = SkepTokenizer.from_pretrained(model_name) ori_test_ds = load_dataset(read_test_file, data_path=args.test_path, lazy=False) trans_func = partial(convert_example_to_feature_ext, tokenizer=tokenizer, label2id=ext_label2id, max_seq_len=args.ext_max_seq_len, is_test=True) test_ds = copy.copy(ori_test_ds).map(trans_func, lazy=False) batchify_fn = lambda samples, fn=Tuple( Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype="int64"), Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype="int64"), Stack(dtype="int64"), ): fn(samples) test_batch_sampler = paddle.io.BatchSampler(test_ds, batch_size=args.batch_size, shuffle=False) test_loader = paddle.io.DataLoader(test_ds, batch_sampler=test_batch_sampler, collate_fn=batchify_fn) print("test data loaded.") # load ext model ext_state_dict = paddle.load(args.ext_model_path) ext_model = SkepForTokenClassification.from_pretrained( model_name, num_classes=len(ext_label2id)) ext_model.load_dict(ext_state_dict) print("extraction model loaded.") ext_model.eval() results = [] for bid, batch_data in enumerate(test_loader): input_ids, token_type_ids, seq_lens = batch_data logits = ext_model(input_ids, token_type_ids=token_type_ids) predictions = logits.argmax(axis=2).numpy() for eid, (seq_len, prediction) in enumerate(zip(seq_lens, predictions)): idx = bid * args.batch_size + eid tag_seq = [ext_id2label[idx] for idx in prediction[:seq_len][1:-1]] text = ori_test_ds[idx]["text"] aps = decoding(text[:args.ext_max_seq_len - 2], tag_seq) for aid, ap in enumerate(aps): aspect, opinions = ap[0], list(set(ap[1:])) aspect_text = concate_aspect_and_opinion( text, aspect, opinions) results.append({ "id": str(idx) + "_" + str(aid), "aspect": aspect, "opinions": opinions, "text": text, "aspect_text": aspect_text }) return results
def evaluate(model, criterion, data_loader, file_path, mode, logger): """ mode eval: eval on development set and compute P/R/F1, called between training. mode predict: eval on development / test set, then write predictions to \ predict_test.json and predict_test.json.zip \ under args.data_path dir for later submission or evaluation. """ model.eval() probs_all = None seq_len_all = None tok_to_orig_start_index_all = None tok_to_orig_end_index_all = None loss_all = 0 eval_steps = 0 logger.info( "\n----------------------------------IN Evaluate func-----------------------------------\n" ) for batch in tqdm(data_loader, total=len(data_loader)): eval_steps += 1 input_ids, seq_len, tok_to_orig_start_index, tok_to_orig_end_index, labels = batch if args.device == 'cuda': input_ids = input_ids.cuda() labels = labels.cuda() logits = model(input_ids=input_ids) mask = (input_ids != 0) & (input_ids != 1) & (input_ids != 2) loss = criterion(logits, labels, mask) loss_all += loss.detach().cpu().numpy().item() probs = torch.sigmoid(logits).cpu() if probs_all is None: probs_all = probs.numpy() seq_len_all = seq_len.numpy() tok_to_orig_start_index_all = tok_to_orig_start_index.numpy() tok_to_orig_end_index_all = tok_to_orig_end_index.numpy() else: probs_all = np.append(probs_all, probs.numpy(), axis=0) seq_len_all = np.append(seq_len_all, seq_len.numpy(), axis=0) tok_to_orig_start_index_all = np.append( tok_to_orig_start_index_all, tok_to_orig_start_index.numpy(), axis=0) tok_to_orig_end_index_all = np.append( tok_to_orig_end_index_all, tok_to_orig_end_index.numpy(), axis=0) loss_avg = loss_all / eval_steps logger.info("eval loss: %f" % (loss_avg)) id2spo_path = os.path.join(os.path.dirname(file_path), "id2spo.json") with open(id2spo_path, 'r', encoding='utf8') as fp: id2spo = json.load(fp) formatted_outputs = decoding(file_path, id2spo, probs_all, seq_len_all, tok_to_orig_start_index_all, tok_to_orig_end_index_all) if mode == "predict": predict_file_path = os.path.join(args.data_path, 'predictions.json') else: predict_file_path = os.path.join(args.data_path, 'predict_eval.json') predict_zipfile_path = write_prediction_results(formatted_outputs, predict_file_path) if mode == "eval": precision, recall, f1 = get_precision_recall_f1( file_path, predict_zipfile_path) os.system('rm {} {}'.format(predict_file_path, predict_zipfile_path)) return precision, recall, f1 elif mode != "predict": logger.debug("wrong mode for eval func") raise Exception("wrong mode for eval func") logger.info("Finish evaluating.")