def __init__(self, opt): self.opt = opt if opt.v2: absa_data_reader = ABSADataReaderV2(data_dir=opt.data_dir) else: absa_data_reader = ABSADataReader(data_dir=opt.data_dir) tokenizer = build_tokenizer(data_dir=opt.data_dir) embedding_matrix = build_embedding_matrix(opt.data_dir, tokenizer.word2idx, opt.embed_dim, opt.dataset) self.idx2tag, self.idx2polarity = absa_data_reader.reverse_tag_map, absa_data_reader.reverse_polarity_map self.train_data_loader = BucketIterator( data=absa_data_reader.get_train(tokenizer), batch_size=opt.batch_size, shuffle=True) self.dev_data_loader = BucketIterator( data=absa_data_reader.get_dev(tokenizer), batch_size=opt.batch_size, shuffle=False) self.test_data_loader = BucketIterator( data=absa_data_reader.get_test(tokenizer), batch_size=opt.batch_size, shuffle=False) self.model = opt.model_class(embedding_matrix, opt, self.idx2tag, self.idx2polarity).to(opt.device) self._print_args() if torch.cuda.is_available(): print('>>> cuda memory allocated:', torch.cuda.memory_allocated(device=opt.device.index))
def __init__(self, opt): self.opt = opt absa_data_reader = ABSADataReader(data_dir=opt.data_dir) self.tokenizer = build_tokenizer(data_dir=opt.data_dir) embedding_matrix = build_embedding_matrix(opt.data_dir, self.tokenizer.word2idx, opt.embed_dim, opt.dataset) self.idx2tag, self.idx2polarity = absa_data_reader.reverse_tag_map, absa_data_reader.reverse_polarity_map self.model = opt.model_class(embedding_matrix, opt, self.idx2tag, self.idx2polarity).to(opt.device) print('loading model {0} ...'.format(opt.model_name)) self.model.load_state_dict(torch.load(opt.state_dict_path, map_location=lambda storage, loc: storage)) # switch model to evaluation mode self.model.eval() torch.autograd.set_grad_enabled(False)
def __init__(self, opt): self.opt = opt absa_data_reader = ABSADataReader(data_dir=opt.data_dir) self.tokenizer = build_tokenizer(data_dir=opt.data_dir) embedding_matrix = build_embedding_matrix(opt.data_dir, self.tokenizer.word2idx, opt.embed_dim, opt.dataset) self.idx2tag, self.idx2polarity = absa_data_reader.reverse_tag_map, absa_data_reader.reverse_polarity_map self.model = opt.model_class(embedding_matrix, opt, self.idx2tag, self.idx2polarity).to(opt.device) print('loading model {0} ...'.format(opt.model_name)) # self.model.load_state_dict(torch.load(opt.state_dict_path, map_location=lambda storage, loc: storage)) # switch model to evaluation mode self.model.eval() # get a handle on s3 session = boto3.Session( aws_access_key_id='XXXXXXXXXXXX', aws_secret_access_key='XXXXXXXX', region_name='XXXXXXXX') self.s3 = session.resource('s3') self.bucket = self.s3.Bucket('surveybuddy-responses') # example: energy_market_procesing torch.autograd.set_grad_enabled(False)
def __init__(self, opt): self.opt = opt if opt.model in [ 'bote', 'bote_v0_ablation', 'bote_v1_ablation', 'bote_v2_ablation', 'bote_v3_ablation', 'bote_v4' ]: absa_data_reader = ABSADataReaderBERT(data_dir=opt.data_dir) tokenizer = BertTokenizer(opt.bert_model, opt.case, opt.spacy_lang, opt.lang) embedding_matrix = [] self.train_data_loader = BucketIteratorBert( data=absa_data_reader.get_train(tokenizer), batch_size=opt.batch_size, shuffle=True) self.dev_data_loader = BucketIteratorBert( data=absa_data_reader.get_dev(tokenizer), batch_size=opt.batch_size, shuffle=False) self.test_data_loader = BucketIteratorBert( data=absa_data_reader.get_test(tokenizer), batch_size=opt.batch_size, shuffle=False) else: absa_data_reader = ABSADataReader(data_dir=opt.data_dir) tokenizer = build_tokenizer(data_dir=opt.data_dir) embedding_matrix = build_embedding_matrix(opt.data_dir, tokenizer.word2idx, opt.embed_dim, opt.dataset, opt.glove_fname) self.train_data_loader = BucketIterator( data=absa_data_reader.get_train(tokenizer), batch_size=opt.batch_size, shuffle=True) self.dev_data_loader = BucketIterator( data=absa_data_reader.get_dev(tokenizer), batch_size=opt.batch_size, shuffle=False) self.test_data_loader = BucketIterator( data=absa_data_reader.get_test(tokenizer), batch_size=opt.batch_size, shuffle=False) self.idx2tag, self.idx2polarity = absa_data_reader.reverse_tag_map, absa_data_reader.reverse_polarity_map self.model = opt.model_class(embedding_matrix, opt, self.idx2tag, self.idx2polarity).to(opt.device) self.history_metrics = { 'epoch': [], 'step': [], 'train_ap_precision': [], 'train_ap_recall': [], 'train_ap_f1': [], 'train_op_precision': [], 'train_op_recall': [], 'train_op_f1': [], 'train_triplet_precision': [], 'train_triplet_recall': [], 'train_triplet_f1': [], 'dev_ap_precision': [], 'dev_ap_recall': [], 'dev_ap_f1': [], 'dev_op_precision': [], 'dev_op_recall': [], 'dev_op_f1': [], 'dev_triplet_precision': [], 'dev_triplet_recall': [], 'dev_triplet_f1': [] } self.results = { 'aspect_extraction': { 'precision': [], 'recall': [], 'f1': [] }, 'opinion_extraction': { 'precision': [], 'recall': [], 'f1': [] }, 'triplet_extraction': { 'precision': [], 'recall': [], 'f1': [] } } self._print_args() if torch.cuda.is_available(): print('>>> cuda memory allocated:', torch.cuda.memory_allocated(device=opt.device.index))