Пример #1
0
 def evaluate(self, epoch):
     ''' only used for dev dataset '''
     filename = self.result_dir + '/' + str(epoch) + '.json'
     print('evaluate ' + filename)
     re_file = eval_file(filename)
     gt_set = eval_file(self.gt_file_name)
     re_set = re_file["submit_result"]
     match_cnt = 0
     M = 0
     M_ = 0
     for re, gt in zip(re_set, gt_set):
         re_results = re["mention_result"]
         gt_results = gt["lab_result"]
         M += len(gt_results)
         M_ += len(re_results)
         flag = True
         for re_item in re_results:
             for gt_item in gt_results:
                 if re_item["mention"] == gt_item["mention"] \
                 and re_item["offset"] == gt_item["offset"] \
                 and re_item["kb_id"] == gt_item["kb_id"]:
                     # del
                     flag = False
                     match_cnt += 1
         # if flag and epoch == 4:
         # print(re)
         # print(gt)
         # print()
     R = match_cnt / M
     P = match_cnt / M_
     F1 = 2 * P * R / (P + R)
     return R, P, F1
Пример #2
0
 def __init__(self,
              file_dir,
              name_to_com,
              id_to_com,
              max_length=160,
              threshold=0.5,
              result_dir=None):
     super().__init__()
     self.gt_file_name = file_dir
     self.max_length = max_length
     self.name_to_com = name_to_com
     self.id_to_com = id_to_com
     self.threshold = threshold
     self.result_dir = result_dir
     json_data = eval_file(file_dir)
     self.raw_data, self.data = train_data_prepare(json_data, name_to_com,
                                                   TOKENIZER, max_length)
     self._len = self.data['ids'].shape[0]
     self.ids = torch.tensor(self.data['ids'], dtype=torch.long)
     self.mask_mat = torch.tensor(self.data['mask_mat'], dtype=torch.long)
     self.ent_mask = torch.tensor(self.data['ent_mask'], dtype=torch.long)
     self.kb_ids = torch.tensor(self.data['kb_ids'], dtype=torch.long)
     self.labels = torch.tensor(self.data['labels'], dtype=torch.uint8)
     self.offsets = torch.tensor(self.data['offsets'], dtype=torch.long)
Пример #3
0
from utils.optim import get_optim, adjust_lr

os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
torch.backends.cudnn.benchmark = True
if args.seed != -1:
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

curr_time = datetime.datetime.now()
result_dir = '../results/' + curr_time.strftime("%m-%d-%H-%M")
os.mkdir(result_dir)
os.mkdir(result_dir + '/ckpts')

print('Load datasets!')
name_to_com = eval_file('../datasets/name_2_company.json')
id_to_com = eval_file('../datasets/id_2_company.json')
train_data = AnnoData('../datasets/train.json', name_to_com, id_to_com)
train_loader = DataLoader(train_data,
                          batch_size=args.batch_size,
                          shuffle=True,
                          num_workers=4)
val_data = AnnoData('../datasets/dev.json',
                    name_to_com,
                    id_to_com,
                    threshold=args.threshold,
                    result_dir=result_dir)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False, num_workers=4)
print('datasets successfully loaded!\n')

print('config model and optim...')