def save_train_sentiment_value_result(y_pred, y_data, out_file_path): """ Args: y_pred: id_test: id, """ if isinstance(y_pred, np.ndarray): y_pred = y_pred.tolist() predict_labels = convert_sentiment_value_predict(y_pred) id_labels = merge(y_data, predict_labels) result = [] for i, id_label in enumerate(id_labels): parts = id_label.split(',') p = int(parts[-1]) + 1 a = [e for e in parts[0].split()] pred_p = [('%.2f' % num) for num in y_pred[i]] parts.append(' '.join(pred_p)) temp = [ sentiment_onehot_label[parts[0]], parts[4], parts[5], parts[2], parts[1], parts[3] ] parts = temp if a[p] != '1': result.append(','.join(parts)) else: result.insert(0, ','.join(parts)) result = [',,,,,id'] + result file_utils.write_lines(result, out_file_path)
def convert_subject_sentiment_value_predict_result( subject_subject_sentiment_value_file_path, result_file_path): """convert_subject_sentiment_value_predict_result""" subject_sentiment_value_file_lines = file_utils.read_all_lines( subject_subject_sentiment_value_file_path) result = ['content_id,subject,sentiment_value,sentiment_word'] for i, subject_line in enumerate(subject_sentiment_value_file_lines): id_subjects = subject_line.split(',') subjects = id_subjects[1].split('|') for subject in subjects: result.append(id_subjects[0] + ',' + subject.replace('_', ',') + ',') file_utils.write_lines(result, result_file_path)
def save_subject_result(y_pred, id_test, model_name, is_val=False): """save_subject_result""" if y_pred is None: return if isinstance(y_pred, np.ndarray): y_pred = y_pred.tolist() y_pred_probability = convert_predict_for_probability_output(y_pred) id_probabilities = merge(id_test, y_pred_probability) head = 'id,' + ','.join(label_mapping.subject_mapping_list) if is_val: file_utils.write_lines( [head] + id_probabilities, data_path.val_subject_probability_result_file_path + '.' + model_name) else: file_utils.write_lines( [head] + id_probabilities, data_path.test_subject_probability_result_file_path + '.' + model_name) predict_labels = convert_subject_predict( y_pred, threshold=thresholds.topic_positive_threshold) id_labels = merge(id_test, predict_labels) if is_val: file_utils.write_lines(id_labels, data_path.val_subject_result_file_path) else: file_utils.write_lines(id_labels, data_path.test_subject_result_file_path)
def save_sentiment_value_result(y_pred, id_test, model_name, is_val=False): """ Args: y_pred: id_test: id, """ if y_pred is None: return if isinstance(y_pred, np.ndarray): y_pred = y_pred.tolist() y_pred_probability = convert_predict_for_probability_output(y_pred) id_probabilities = merge(id_test, y_pred_probability) head = 'id,' + ','.join(label_mapping.sentiment_value_mapping_list) if is_val: file_utils.write_lines( [head] + id_probabilities, data_path.val_sentiment_value_probability_result_file_path + '.' + model_name) else: file_utils.write_lines( [head] + id_probabilities, data_path.test_public_sentiment_value_probability_result_file_path + '.' + model_name) predict_labels = convert_sentiment_value_predict(y_pred) id_labels = merge(id_test, predict_labels) if is_val: file_utils.write_lines(id_labels, data_path.val_sentiment_value_result_file_path) else: file_utils.write_lines( id_labels, data_path.test_public_sentiment_value_result_file_path)
def merge_subject_sentiment_value(subject_file_path, sentiment_file_path, result_file_path): """convert_subject_sentiment_value_predict_result""" subject_file_lines = file_utils.read_all_lines(subject_file_path) sentiment_file_lines = file_utils.read_all_lines(sentiment_file_path) result = ['content_id,subject,sentiment_value,sentiment_word'] for i, subject_line in enumerate(subject_file_lines): subject_line_parts = subject_line.split(',') sentiment_value = sentiment_file_lines[i].split(',')[1] result.append(subject_line_parts[0] + ',' + subject_line_parts[2] + ',' + sentiment_value + ',') file_utils.write_lines(result, result_file_path)
def save_train_subject_result(y_pred, y_data, model_name): """save_subject_result""" if isinstance(y_pred, np.ndarray): y_pred = y_pred.tolist() predict_labels = convert_subject_predict( y_pred, threshold=thresholds.topic_positive_threshold) id_labels = merge(y_data, predict_labels) y_true = [[float(p) for p in data.split(',')[0].split()] for data in y_data] true_labels = convert_subject_predict( y_true, threshold=thresholds.topic_positive_threshold) y_pred_probability = convert_predict_for_probability_output(y_pred) result = [] result.append(',,,,id') for i, id_label in enumerate(id_labels): if true_labels[i] == predict_labels[i]: continue parts = id_label.split(',') parts[0] = true_labels[i] pred_p = y_pred_probability[i] pred_p_elements = pred_p.split(',') pred_p_str_list = [] for j in range(len(pred_p_elements)): label = label_mapping.subject_mapping_reverse[str(j)] pred_p_str_list.append(label + ':' + pred_p_elements[j]) parts.insert(1, ' '.join(pred_p_str_list)) parts.insert(1, parts[-1]) del parts[-1] result.append(','.join(parts)) result.sort() file_utils.write_lines(result, data_path.train_subject_result_file_path)
parts = train_data_line.split(',') if parts[1] not in train_data_content_line_map: train_data_content_line_map[parts[1]] = [] train_data_content_line_map[parts[1]].append(train_data_line) test_public_for_sentiment_lines = file_utils.read_all_lines( data_path.test_public_for_sentiment_value_file_path) result = ['content_id,subject,sentiment_value,sentiment_word'] in_train_data = set() in_train_data_for_submit = [] for test_public_for_sentiment_line in test_public_for_sentiment_lines: parts = test_public_for_sentiment_line.split(',') if parts[1] in train_data_content_line_map: if parts[1] not in in_train_data: in_train_data.add(parts[1]) in_train_data_samples = train_data_content_line_map[parts[1]] for in_train_data_sample in in_train_data_samples: in_train_data_sample_parts = in_train_data_sample.split(',') result.append(parts[0] + ',' + in_train_data_sample_parts[2] + ',' + in_train_data_sample_parts[3] + ',') in_train_data_for_submit.append(parts[0] + ',' + in_train_data_sample_parts[2] + ',' + in_train_data_sample_parts[3] + ',') else: result.append(parts[0] + ',' + parts[2] + ',0,') file_utils.write_lines(result, data_path.data_base_dir + 'all_zero.result') file_utils.write_lines(in_train_data_for_submit, data_path.data_base_dir + 'in_train_data_for_submit')
# -*- coding: utf-8 -*- """ Date: 2018/10/12 15:32 """ from nlp_tasks.absa.conf import data_path from nlp_tasks.absa.preprocess import label_mapping from nlp_tasks.absa.utils import file_utils if __name__ == '__main__': result = [] topics = label_mapping.subject_mapping.keys() for topic in topics: result += file_utils.\ read_all_lines(data_path.test_public_sentiment_value_result_file_path + '.' + topic) file_utils.write_lines( result, data_path.test_public_sentiment_value_result_file_path)
from nlp_tasks.absa.conf import data_path from nlp_tasks.absa.utils import file_utils in_train_data_for_submit = file_utils.read_all_lines( data_path.data_base_dir + 'in_train_data_for_submit') in_train_data_for_submit_id = [ line.split(',')[0] for line in in_train_data_for_submit ] result_file_name = 'test_public.result_20181028232554_caokong_xingneng.csv' result = file_utils.read_all_lines(data_path.data_base_dir + result_file_name) merge_result = [result.pop(0)] for line in result: id = line.split(',')[0] if id in in_train_data_for_submit_id: continue else: merge_result.append(line) merge_result.extend(in_train_data_for_submit) file_utils.write_lines( merge_result, data_path.data_base_dir + result_file_name + '.merge_result_and_in_train')