def predict_test(self, output_filepath): USE_GPU = torch.cuda.is_available() if USE_GPU: gpu_id = self.configuration['gpu_id'] else: gpu_id = -1 predictor = pytorch_models.SpanBasedModelForMultiRCPredictor(self.model, self.val_iterator, self.distinct_polarities, configuration=self.configuration, cuda_device=gpu_id) data = self.test_data result = predictor.predict(data) pred = result['pred'] true = result['true'] samples = result['samples'] output_lines = [] for i, sample in enumerate(samples): sample['pred'] = pred[i] sample['true'] = true[i] sample['text'] = ' '.join(sample['words']) output_line = json.dumps(sample) output_lines.append(output_line) file_utils.write_lines(output_lines, output_filepath)
def predict_backup(self): USE_GPU = torch.cuda.is_available() if USE_GPU: gpu_id = self.configuration['gpu_id'] else: gpu_id = -1 predictor = pytorch_models.SpanBasedModelPredictor( self.model, self.val_iterator, self.distinct_polarities, configuration=self.configuration, cuda_device=gpu_id) data_type_and_data = { # 'train': self.train_data, # 'dev': self.dev_data, 'test': self.test_data } if self.hard_test_data: data_type_and_data['hard_test'] = self.hard_test_data for data_type, data_temp in data_type_and_data.items(): # for multi data = [] for instance in data_temp: aspect_terms = instance.fields['sample'].metadata[ 'aspect_terms'] if len(aspect_terms) != 2: continue data.append(instance) # text = instance.fields['sample'].metadata['text'] # # i love the keyboard and the screen. () # # The best thing about this laptop is the price along with some of the newer features. # if 'that any existing MagSafe' in text: # data.append(instance) # break result = predictor.predict(data) correct_sentences = [] for e in result: sentiment_outputs_for_aspect_terms = e[ 'sentiment_outputs_for_aspect_terms'] aspect_terms = e['aspect_terms'] for i in range(len(aspect_terms)): if aspect_terms[ i].polarity != sentiment_outputs_for_aspect_terms[ i][1]: break else: correct_sentences.append(e['text']) file_utils.write_lines(correct_sentences, 'd:/correct_sentences.txt') self.logger.info('data_type: %s result: %s' % (data_type, result))
def predict_test(self, output_filepath): USE_GPU = torch.cuda.is_available() if USE_GPU: gpu_id = self.configuration['gpu_id'] else: gpu_id = -1 predictor = pytorch_models.SpanBasedModelPredictor( self.model, self.val_iterator, self.distinct_polarities, configuration=self.configuration, cuda_device=gpu_id) data = self.test_data result = predictor.predict(data) output_lines = [] for sample in result: text = sample['text'] words_for_test = text.split(' ') aspect_terms = sample['aspect_terms'] word_indices_of_aspect_terms = [] for aspect_term in aspect_terms: from_index = aspect_term.from_index term = aspect_term.term start_index = 0 if from_index > 0: start_index = len(text[:from_index].strip().split(' ')) term_length = len(term.split(' ')) word_indices_of_aspect_terms.append( [start_index, start_index + term_length]) sentiment_outputs_for_aspect_terms = sample[ 'sentiment_outputs_for_aspect_terms'] for i in range(len(word_indices_of_aspect_terms)): term = aspect_terms[i].term word_indices = word_indices_of_aspect_terms[i] if term != ' '.join( words_for_test[word_indices[0]:word_indices[1]]): print('error') sentiment = sentiment_outputs_for_aspect_terms[i][1] output_line = json.dumps({ 'text': text, 'aspect_term': '%s-%d-%d' % (term, word_indices[0], word_indices[1]), 'sentiment': sentiment }) output_lines.append(output_line) file_utils.write_lines(output_lines, output_filepath)
def error_analysis(self): """ :return: """ USE_GPU = torch.cuda.is_available() if USE_GPU: gpu_id = self.configuration['gpu_id'] else: gpu_id = -1 predictor = pytorch_models.TextInAllAspectSentimentOutPredictor( self.model, self.val_iterator, self.distinct_categories, self.distinct_polarities, configuration=self.configuration, cuda_device=gpu_id) data = self.test_data result = predictor.predict(data) result_final = [('sentence', 'aspect', 'predict', 'true')] for i in range(len(data)): instance: Instance = data[i] metadata = instance.fields['sample'].metadata sentence = metadata['text'] labels_true = { self.distinct_categories[e[0]]: self.distinct_polarities[e[1]] for e in metadata['labels'] } labels_pred = result[i] for label_pred in labels_pred: label_true = labels_true[label_pred[0]] if label_true == label_pred[1]: continue result_final.append( (sentence, label_pred[0], label_pred[1], label_true)) result_str = ['\t'.join(e) for e in result_final] output_filepath = os.path.join(self.model_dir, 'error_analysis.csv') file_utils.write_lines(result_str, output_filepath) return result_final
def predict_test(self, output_filepath): USE_GPU = torch.cuda.is_available() if USE_GPU: gpu_id = self.configuration['gpu_id'] else: gpu_id = -1 predictor = pytorch_models.SpanBasedModelPredictor( self.model, self.val_iterator, self.distinct_polarities, configuration=self.configuration, cuda_device=gpu_id) data = self.test_data result = predictor.predict(data) output_lines = [] for sample in result: aspect_terms = sample['aspect_terms'] sentiment_outputs_for_aspect_terms = sample[ 'sentiment_outputs_for_aspect_terms'] for i in range(len(aspect_terms)): aspect_term_i = aspect_terms[i] sentiment = sentiment_outputs_for_aspect_terms[i][1] text = sample['text'] opinion = aspect_term_i.metadata['opinion'] if 'text_backup' in sample: text = sample['text_backup'] self.remove_backup_index(opinion['aspect_term']) self.remove_backup_index(opinion['opinion_term']) output_line = json.dumps({ 'text': text, 'opinion': opinion, 'sentiment_pred': sentiment }) output_lines.append(output_line) file_utils.write_lines(output_lines, output_filepath)