示例#1
0
    def __init__(self, opt):
        self.opt = opt

        absa_dataset = ABSADatesetReader(dataset=opt.dataset,
                                         embed_dim=opt.embed_dim)

        self.train_data_loader = BucketIterator(data=absa_dataset.train_data,
                                                batch_size=opt.batch_size,
                                                shuffle=True)
        self.test_data_loader = BucketIterator(data=absa_dataset.test_data,
                                               batch_size=opt.batch_size,
                                               shuffle=False)

        self.model = opt.model_class(absa_dataset.embedding_matrix,
                                     opt).to(opt.device)
        self._print_args()
        self.global_f1 = 0.

        if torch.cuda.is_available():
            print('cuda memory allocated:',
                  torch.cuda.memory_allocated(device=opt.device.index))
示例#2
0
    def __init__(self, opt):
        self.opt = opt

        #        absa_dataset = ABSADatesetReader(dataset=opt.dataset, embed_dim=opt.embed_dim)
        absa_dataset = pickle.load(open(opt.dataset + '_datas.pkl', 'rb'))
        absa_other = pickle.load(open('other' + '_datas.pkl', 'rb'))
        opt.edge_size = len(absa_dataset.edgevocab)
        #        self.train_data_loader = BucketIterator(data=absa_dataset.train_data, other=absa_other.train_data, batch_size=opt.batch_size, shuffle=True)
        self.train_data_loader = BucketIterator(data=absa_dataset.train_data,
                                                batch_size=opt.batch_size,
                                                shuffle=True)
        self.test_data_loader = BucketIterator(data=absa_dataset.test_data,
                                               batch_size=opt.batch_size,
                                               shuffle=False)

        self.model = opt.model_class(absa_dataset.embedding_matrix,
                                     opt).to(opt.device)
        self._print_args()
        self.global_f1 = 0.

        if torch.cuda.is_available():
            print('cuda memory allocated:',
                  torch.cuda.memory_allocated(device=opt.device.index))
示例#3
0
    def _train(self, optimizer, fold_i):
        max_test_precision = 0
        max_test_recall = 0
        max_test_f1 = 0

        max_test_precision_e = 0
        max_test_recall_e = 0
        max_test_f1_e = 0
        max_test_precision_c = 0
        max_test_recall_c = 0
        max_test_f1_c = 0
        max_pair_matrix_len = 20
        global_step = 0
        continue_not_increase = 0
        train_data_loader = BucketIterator(
            data=self.dataset.train_data[fold_i],
            batch_size=opt.batch_size,
            shuffle=True)
        test_data_loader = BucketIterator(data=self.dataset.test_data[fold_i],
                                          batch_size=opt.batch_size,
                                          shuffle=False)
        for epoch in range(self.opt.num_epoch):
            print('>' * 100)
            print('epoch: ', epoch)
            increase_flag = False
            for i_batch, sample_batched in enumerate(train_data_loader):
                global_step += 1

                # switch model to training mode, clear gradient accumulators
                self.model.train()
                optimizer.zero_grad()

                inputs = [
                    sample_batched[col].to(self.opt.device)
                    for col in self.opt.inputs_cols
                ]
                targets = [
                    sample_batched[col].to(self.opt.device)
                    for col in self.opt.targets_cols
                ]

                outputs = self.model(inputs)
                loss = self.model.calc_loss(outputs, targets)
                loss.backward()
                optimizer.step()
                if global_step % self.opt.log_step == 0:
                    output_emotion, output_cause, output_pair = outputs
                    target_emotion, target_cause, target_pair = targets
                    output_pairs = torch.nonzero(
                        output_pair > 0.3).cpu().numpy().tolist()
                    target_pairs = torch.nonzero(
                        target_pair > 0.3).cpu().numpy().tolist()
                    n_TP = 0
                    for i_pair in output_pairs:
                        if i_pair in target_pairs:
                            n_TP += 1
                    n_FP = (len(output_pairs) - n_TP)
                    n_FN = (len(target_pairs) - n_TP)
                    precision = float(n_TP) / float(n_TP + n_FP + 1e-5)
                    recall = float(n_TP) / float(n_TP + n_FN + 1e-5)
                    f1 = 2 * precision * recall / (precision + recall + 1e-5)

                    test_precision, test_recall, test_f1, test_precision_e, test_recall_e, test_f1_e, test_precision_c, test_recall_c, test_f1_c = self._evaluate(
                        test_data_loader)

                    if test_f1 > max_test_f1:
                        increase_flag = True
                        max_test_precision = test_precision
                        max_test_recall = test_recall
                        max_test_f1 = test_f1
                        if self.opt.save:
                            if not os.path.exists('state_dict'):
                                os.mkdir('state_dict')
                            torch.save(
                                self.model.state_dict(),
                                'state_dict/' + self.opt.model_name + '_' +
                                str(fold_i) + self.opt.dataset + '.pkl')
                            print('>> model saved.')
                    print(
                        'loss: {:.4f}, train_f1: {:.4f}, test_precision: {:.4f}, test_recall: {:.4f}, test_f1: {:.4f}'
                        .format(loss.item(), f1, test_precision, test_recall,
                                test_f1))
                    if test_f1_e > max_test_f1_e:
                        increase_flag = True
                        max_test_precision_e = test_precision_e
                        max_test_recall_e = test_recall_e
                        max_test_f1_e = test_f1_e
                    print(
                        'test_precision_e: {:.4f}, test_recall_e: {:.4f}, test_f1_e: {:.4f}'
                        .format(test_precision_e, test_recall_e, test_f1_e))
                    if test_f1_c > max_test_f1_c:
                        increase_flag = True
                        max_test_precision_c = test_precision_c
                        max_test_recall_c = test_recall_c
                        max_test_f1_c = test_f1_c
                    print(
                        'test_precision_c: {:.4f}, test_recall_c: {:.4f}, test_f1_c: {:.4f}'
                        .format(test_precision_c, test_recall_c, test_f1_c))

            if increase_flag == False:
                continue_not_increase += 1
                if continue_not_increase >= self.opt.patience:
                    print('>> early stop.')
                    break
            else:
                continue_not_increase = 0
        return max_test_precision, max_test_recall, max_test_f1, max_test_precision_e, max_test_recall_e, max_test_f1_e, max_test_precision_c, max_test_recall_c, max_test_f1_c
示例#4
0
    def __init__(self, opt):
        self.opt = opt

        if opt.model in [
                'bote', 'bote_v0_ablation', 'bote_v1_ablation',
                'bote_v2_ablation', 'bote_v3_ablation', 'bote_v4'
        ]:
            absa_data_reader = ABSADataReaderBERT(data_dir=opt.data_dir)
            tokenizer = BertTokenizer(opt.bert_model, opt.case, opt.spacy_lang,
                                      opt.lang)
            embedding_matrix = []
            self.train_data_loader = BucketIteratorBert(
                data=absa_data_reader.get_train(tokenizer),
                batch_size=opt.batch_size,
                shuffle=True)
            self.dev_data_loader = BucketIteratorBert(
                data=absa_data_reader.get_dev(tokenizer),
                batch_size=opt.batch_size,
                shuffle=False)
            self.test_data_loader = BucketIteratorBert(
                data=absa_data_reader.get_test(tokenizer),
                batch_size=opt.batch_size,
                shuffle=False)
        else:
            absa_data_reader = ABSADataReader(data_dir=opt.data_dir)
            tokenizer = build_tokenizer(data_dir=opt.data_dir)
            embedding_matrix = build_embedding_matrix(opt.data_dir,
                                                      tokenizer.word2idx,
                                                      opt.embed_dim,
                                                      opt.dataset,
                                                      opt.glove_fname)
            self.train_data_loader = BucketIterator(
                data=absa_data_reader.get_train(tokenizer),
                batch_size=opt.batch_size,
                shuffle=True)
            self.dev_data_loader = BucketIterator(
                data=absa_data_reader.get_dev(tokenizer),
                batch_size=opt.batch_size,
                shuffle=False)
            self.test_data_loader = BucketIterator(
                data=absa_data_reader.get_test(tokenizer),
                batch_size=opt.batch_size,
                shuffle=False)

        self.idx2tag, self.idx2polarity = absa_data_reader.reverse_tag_map, absa_data_reader.reverse_polarity_map
        self.model = opt.model_class(embedding_matrix, opt, self.idx2tag,
                                     self.idx2polarity).to(opt.device)
        self.history_metrics = {
            'epoch': [],
            'step': [],
            'train_ap_precision': [],
            'train_ap_recall': [],
            'train_ap_f1': [],
            'train_op_precision': [],
            'train_op_recall': [],
            'train_op_f1': [],
            'train_triplet_precision': [],
            'train_triplet_recall': [],
            'train_triplet_f1': [],
            'dev_ap_precision': [],
            'dev_ap_recall': [],
            'dev_ap_f1': [],
            'dev_op_precision': [],
            'dev_op_recall': [],
            'dev_op_f1': [],
            'dev_triplet_precision': [],
            'dev_triplet_recall': [],
            'dev_triplet_f1': []
        }

        self.results = {
            'aspect_extraction': {
                'precision': [],
                'recall': [],
                'f1': []
            },
            'opinion_extraction': {
                'precision': [],
                'recall': [],
                'f1': []
            },
            'triplet_extraction': {
                'precision': [],
                'recall': [],
                'f1': []
            }
        }

        self._print_args()

        if torch.cuda.is_available():
            print('>>> cuda memory allocated:',
                  torch.cuda.memory_allocated(device=opt.device.index))