Exemplo n.º 1
0
    def __init__(self, opt):
        self.opt = opt
        fname = {
            'cr': {
                'train': './datasets/cr/train.csv',
                'test': './datasets/cr/dev.csv'
            },
            'mr': {
                'train': './datasets/mr/train.csv',
                'test': './datasets/mr/dev.csv'
            },
            'mpqa': {
                'train': './datasets/mpqa/train.csv',
                'test': './datasets/mpqa/dev.csv'
            },
            'subj': {
                'train': './datasets/subj/train.csv',
                'test': './datasets/subj/dev.csv'
            },
            'sst2': {
                'train': './datasets/sst2/train.csv',
                'test': './datasets/sst2/test.csv'
            },
            'TREC': {
                'train': './datasets/TREC/train.csv',
                'test': './datasets/TREC/test.csv'
            },
        }
        if os.path.exists(opt.dataset + '_word2idx.pkl'):
            print("loading {0} tokenizer...".format(opt.dataset))
            with open(opt.dataset + '_word2idx.pkl', 'rb') as f:
                word2idx = pickle.load(f)
                self.tokenizer = Tokenizer(word2idx=word2idx)
        else:
            print("reading {0} dataset...".format(opt.dataset))

            text = ABSADatesetReader.__read_text__(
                [fname[opt.dataset]['train'], fname[opt.dataset]['test']])
            self.tokenizer = Tokenizer()
            self.tokenizer.fit_on_text(text)
            with open(opt.dataset + '_word2idx.pkl', 'wb') as f:
                pickle.dump(self.tokenizer.word2idx, f)
        embedding_matrix = build_embedding_matrix(self.tokenizer.word2idx,
                                                  opt.embed_dim, opt.dataset)
        self.model = opt.model_class(embedding_matrix, opt).to(opt.device)
        print('loading model {0} ...'.format(opt.model_name))
        self.model.load_state_dict(torch.load(opt.state_dict_path))
        self.model = self.model
        # switch model to evaluation mode
        self.model.eval()
        torch.autograd.set_grad_enabled(False)
Exemplo n.º 2
0
    def __init__(self, opt):
        self.opt = opt
        fname = {
            'twitter': {
                'train': './datasets/acl-14-short-data/train.raw',
                'test': './datasets/acl-14-short-data/test.raw'
            },
            'rest14': {
                'train': './datasets/semeval14/restaurant_train.raw',
                'test': './datasets/semeval14/restaurant_test.raw'
            },
            'lap14': {
                'train': './datasets/semeval14/laptop_train.raw',
                'test': './datasets/semeval14/laptop_test.raw'
            },
            'rest15': {
                'train': './datasets/semeval15/restaurant_train.raw',
                'test': './datasets/semeval15/restaurant_test.raw'
            },
            'rest16': {
                'train': './datasets/semeval16/restaurant_train.raw',
                'test': './datasets/semeval16/restaurant_test.raw'
            },
        }
        if os.path.exists(opt.dataset + '_word2idx.pkl'):
            print("loading {0} tokenizer...".format(opt.dataset))
            with open(opt.dataset + '_word2idx.pkl', 'rb') as f:
                word2idx = pickle.load(f)
                self.tokenizer = Tokenizer(word2idx=word2idx)
        else:
            print("reading {0} dataset...".format(opt.dataset))

            text = ABSADatesetReader.__read_text__(
                [fname[opt.dataset]['train'], fname[opt.dataset]['test']])
            self.tokenizer = Tokenizer()
            self.tokenizer.fit_on_text(text)
            with open(opt.dataset + '_word2idx.pkl', 'wb') as f:
                pickle.dump(self.tokenizer.word2idx, f)
        embedding_matrix = build_embedding_matrix(self.tokenizer.word2idx,
                                                  opt.embed_dim, opt.dataset)
        self.model = opt.model_class(embedding_matrix, opt).to(opt.device)
        print('loading model {0} ...'.format(opt.model_name))
        self.model.load_state_dict(torch.load(opt.state_dict_path))
        self.model = self.model
        # switch model to evaluation mode
        self.model.eval()
        torch.autograd.set_grad_enabled(False)