F.pad(t, pad=(0, 77 - t.size(0))).view(1, -1) for t in mask_index_ts ] words_t = torch.cat(words_ts, dim=0) sent_t = torch.cat(sent_ts, dim=0) mask_index_t = torch.cat(mask_index_ts, dim=0) words_num_t = torch.tensor(words_num, dtype=torch.long) mask_num_t = torch.tensor(mask_num, dtype=torch.long) return words_t, words_num_t, sent_t, mask_index_t, mask_num_t if __name__ == "__main__": char2idx, idx2char = get_chars('../corpus/chars.lst') data = TestData('../data/demo_test.txt', char2idx) # words_idx, sent_idx, mask_index, mask_label_idx, label_idx = data[0] # print(words_idx) # print(sent_idx) # print(mask_index) # print(mask_label_idx) # print(label_idx) # words = [idx2char[idx] for idx in words_idx] # print(words) # mask_label = [idx2char[idx] for idx in mask_label_idx] # print(mask_label) # print(len(words_idx), len(sent_idx), len(mask_index), len(mask_label_idx)) # print('size of data = ', len(data)) # maxlen = 0
import cv2 import utils image = cv2.imread('1.png', cv2.IMREAD_COLOR) blue = utils.get_chars(image.copy(), utils.BLUE) green = utils.get_chars(image.copy(), utils.GREEN) red = utils.get_chars(image.copy(), utils.RED) cv2.imshow('Image Gray', blue) cv2.waitKey(0) cv2.imshow('Image Gray', green) cv2.waitKey(0) cv2.imshow('Image Gray', red) cv2.waitKey(0)
mask_index_t) mask_loss, mask_acc = mask_loss_fn(mask_out, mask_label_t, mask_num_t) mask_loss = mask_loss / mask_num_t.float().sum() sent_cls_loss = sent_cls_loss_fn(sent_cls_out, label_t) total_loss = mask_loss + sent_cls_loss sent_cls_pred = sent_cls_out.argmax(dim=-1) sent_cls_acc = sent_cls_pred.eq(label_t).float().sum() / words_t.size( 0) return total_loss.item(), mask_loss.item(), mask_acc.item( ), sent_cls_loss.item(), sent_cls_acc.item() char2idx, idx2char = get_chars(cfg.chars_path) train_data = TrainData(cfg.train_path) valid_data = ValidData(cfg.valid_path) net = Bert(cfg).to(device) print(net) show_model_size(net) try: model_path = os.path.abspath(cfg.load_model_path) net.load_state_dict( torch.load( os.path.join( model_path, '%s_%.8f_lr_%d_embeddim_%.2f_dropout_%d_layers.pt' % (net.name, cfg.lr, cfg.embed_dim, cfg.dropout,
print('{} = {}'.format(key, val)) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') def test(net, words_t, words_num_t, sent_t): net.eval() with torch.no_grad(): para_cls_out = net(words_t, words_num_t, sent_t) para_cls_pred = para_cls_out.argmax(dim=-1) return para_cls_out char2idx, idx2char = get_chars(cfg.chars_path) label2idx, idx2label = get_chars(cfg.para_cls_labels_path) test_data = TestData(cfg.test_path, char2idx) bert_net = Bert(cfg).to(device) para_cls_net = Para_cls_model(cfg, bert_net).to(device) print(para_cls_net) show_model_size(para_cls_net) try: model_path = os.path.abspath(cfg.para_cls_load_model_path) para_cls_net.load_state_dict( torch.load( os.path.join(
return loss.item() def valid(net, words_t, label_t, loss_fn): net.eval() with torch.no_grad(): batch_size = words_t.size(0) out = net(words_t) loss = loss_fn(out, label_t) pred = out.argmax(dim=1) acc = pred.eq(label_t).float().sum(dim=-1) / batch_size return loss.item(), acc.item() char2idx, idx2char = get_chars(os.path.join(BASE_PATH, config.chars_path)) label2idx, idx2label = get_labels(os.path.join(BASE_PATH, config.labels_path)) train_data = TrainData(os.path.join(BASE_PATH, config.train_path), char2idx, label2idx) valid_data = ValidData(os.path.join(BASE_PATH, config.valid_path), char2idx, label2idx) train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True, collate_fn=collate_fn) valid_loader = DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn) net = FFN(config).to(device) print(net) show_model_size(net) try: model_path = os.path.join(BASE_PATH, config.load_model_path)) net.load_state_dict(torch.load(os.path.join(model_path, '%s_%.8f_lr_%d_hidsize.pt' % (net.name, config.lr, config.hidden_size)))) opt = optim.Adam(net.parameters(), lr=config.cur_lr) print('load pre-train model succeed.')
sent_ts = [torch.tensor(idx, dtype=torch.long) for idx in sent_idx] words_ts = [F.pad(t, pad=(0, 512-t.size(0))).view(1, -1) for t in words_ts] sent_ts = [F.pad(t, pad=(0, 512-t.size(0))).view(1, -1) for t in sent_ts] words_t = torch.cat(words_ts, dim=0) sent_t = torch.cat(sent_ts, dim=0) words_num_t = torch.tensor(words_num, dtype=torch.long) return words_t, words_num_t, sent_t if __name__ == "__main__": char2idx, idx2char = get_chars('../../corpus/chars.lst') label2idx, idx2label = get_chars('../../corpus/labels.lst') data = TestData('../data/demo_test.txt', char2idx) # words_idx, sent_idx, mask_index, mask_label_idx, label_idx = data[0] # print(words_idx) # print(sent_idx) # print(mask_index) # print(mask_label_idx) # print(label_idx) # words = [idx2char[idx] for idx in words_idx] # print(words) # mask_label = [idx2char[idx] for idx in mask_label_idx] # print(mask_label) # print(len(words_idx), len(sent_idx), len(mask_index), len(mask_label_idx)) # print('size of data = ', len(data))
#return words_idx, sent_idx, mask_index, mask_label_idx, label_idx res = ' '.join([str(idx) for idx in words_idx]) + '\t' res += ' '.join([str(idx) for idx in sent_idx]) + '\t' res += ' '.join([str(idx) for idx in mask_index]) + '\t' res += ' '.join([str(idx) for idx in mask_label_idx]) + '\t' res += label + '\n' return res def __len__(self): return len(self.data) def gen_bert_data(self, output_file): with open(output_file, 'w', encoding='utf-8') as f: for i in range(len(self.data)): f.write(self._gen_item(i)) if __name__ == "__main__": if len(sys.argv) != 4: print('Using: python %s chars_vocab_path raw_text_path output_idx_data_path') sys.exit(1) char2idx, idx2char = get_chars(sys.argv[1]) data = Dataset(sys.argv[2], char2idx) data.gen_bert_data(sys.argv[3]) #print(data._gen_item(0)) pass