def get_patterns_from_dict(event_elements): """ 将提取出的事件要素转换成特征 :param event_elements: 字典形式的事件要素 :return patterns: 字典形式的特征 """ patterns = dict() # 从事件要素中的"加刑因素"提取出三个特征:01死亡人数、02重伤人数、03轻伤人数 patterns["01死亡人数"], patterns["02重伤人数"], patterns["03轻伤人数"] = extract_seg( "".join(event_elements["加刑因素"])) # 从事件要素中的"主次责任"提取出特征:04责任认定 patterns["04责任认定"] = find_element(event_elements["主次责任"], "全部责任") # 从事件要素中的"加刑因素"提取出8个特征 patterns["05是否酒后驾驶"] = find_element(event_elements["加刑因素"], "酒") patterns["06是否吸毒后驾驶"] = find_element(event_elements["加刑因素"], "毒") patterns["07是否无证驾驶"] = find_element(event_elements["加刑因素"], "驾驶证", "证") patterns["08是否无牌驾驶"] = find_element(event_elements["加刑因素"], "牌照", "牌") patterns["09是否不安全驾驶"] = find_element(event_elements["加刑因素"], "安全") patterns["10是否超载"] = find_element(event_elements["加刑因素"], "超载") patterns["11是否逃逸"] = find_element(event_elements["加刑因素"], "逃逸", "逃离") patterns["是否初犯偶犯"] = 1 - int(find_element(event_elements["加刑因素"], "前科")) # 从事件要素中的"减刑因素"提取出7个特征 patterns["12是否抢救伤者"] = find_element(event_elements["减刑因素"], "抢救", "施救") patterns["13是否报警"] = find_element(event_elements["减刑因素"], "报警", "自首", "投案") patterns["14是否现场等待"] = find_element(event_elements["减刑因素"], "现场", "等候") patterns["15是否赔偿"] = find_element(event_elements["减刑因素"], "赔偿") patterns["16是否认罪"] = find_element(event_elements["减刑因素"], "认罪") patterns["17是否如实供述"] = find_element(event_elements["减刑因素"], "如实") if patterns["是否初犯偶犯"] == 0: patterns["18是否初犯偶犯"] = "0" else: patterns["18是否初犯偶犯"] = "1" return patterns
"/home/zhangshiwei/Event-Extraction/01数据预处理/preprocessed_data.txt", is_label=True) num_cases = len( glob.glob( "/home/zhangshiwei/Event-Extraction/06判决结果预测/特征提取/data/单个案件/*.txt")) f1 = open("/home/zhangshiwei/Event-Extraction/01数据预处理/preprocessed_data.txt", "r", encoding="utf-8") contents = f1.readlines() for i in range(1, num_cases + 1): file_name = "data/单个案件/" + str(i) + ".txt" result = get_event_elements(file_name) patterns = get_patterns_from_dict(result) # 因为目前CRF提取的效果还不够好,伤亡情况可能没有提取出来 # 保险起见,对整个案件进行提取死亡人数等3个特征,而非事件抽取的结果 patterns['01死亡人数'], patterns['02重伤人数'], patterns['03轻伤人数'] = extract_seg( contents[i - 1]) patterns["判决结果"] = labels[i - 1] del patterns["是否初犯偶犯"] del patterns["03轻伤人数"] del patterns["17是否如实供述"] rows.append(patterns) f1.close() # 写回数据 with open("data.csv", "w", newline='') as f: f_csv = csv.DictWriter(f, headers) f_csv.writeheader() f_csv.writerows(rows)
] rows = [] # 提取标签 labels = label_case("preprocessed_data.txt", is_label=True) num_cases = 1 f1 = open("preprocessed_data.txt", "r", encoding="utf-8") cases = f1.readlines() event_elements = get_event_elements("CRF结果.txt") patterns = get_patterns_from_dict(event_elements) # 因为目前CRF提取的效果还不够好,伤亡情况可能没有提取出来 # 保险起见,对整个案件进行提取死亡人数等3个特征,而非事件抽取的结果 patterns['01死亡人数'], patterns['02重伤人数'], patterns['03轻伤人数'] = extract_seg(line2) patterns["判决结果"] = labels[0] del patterns["是否初犯偶犯"] del patterns["03轻伤人数"] del patterns["17是否如实供述"] rows.append(patterns) f1.close() # 写回数据 with open("pattern.csv", "w", newline='') as f: f_csv = csv.DictWriter(f, headers) # f_csv.writeheader() f_csv.writerows(rows) f3 = open("pattern.csv", "r") f3_csv = list(csv.reader(f3))
num_cases = len( glob.glob( "/home/zhangshiwei/Event-Extraction/06判决结果预测/特征提取/data/单个案件/*.txt")) f1 = open("/home/zhangshiwei/Event-Extraction/01数据预处理/preprocessed_data.txt", "r", encoding="utf-8") cases = f1.readlines() for i in range(1, num_cases + 1): file_name = "data/单个案件/" + str(i) + ".txt" event_elements = get_event_elements(file_name) patterns = get_patterns_from_dict(event_elements) # 因为目前CRF提取的效果还不够好,伤亡情况可能没有提取出来 # 保险起见,对整个案件进行提取死亡人数等3个特征,而非事件抽取的结果 patterns['01死亡人数'], patterns['02重伤人数'], patterns['03轻伤人数'] = extract_seg( cases[i - 1]) patterns["判决结果"] = labels[i - 1] del patterns["是否初犯偶犯"] del patterns["03轻伤人数"] del patterns["17是否如实供述"] rows.append(patterns) f1.close() # 写回数据 with open("data.csv", "w", newline='') as f: f_csv = csv.DictWriter(f, headers) # f_csv.writeheader() f_csv.writerows(rows)