def violate_punishment_save(is_contain_accord, violate_punishment_accord_info, law_id): cursor = conn.cursor() if is_contain_accord == 1: contain_accord_insert_sql = '''insert into violate_punishment_accord (violate_law_id, violate_chapter_id, violate_article_id, violate_sentence_id, punishment_law_id, punishment_chapter_id, punishment_article_id, punishment_sentence_id, accord_law_id, accord_chapter_id, accord_article_id, accord_sentence_id, violate_content, punishment_content, accord_content, is_contain_accord) value (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''' violate_info = violate_punishment_accord_info['violate_info'] punishment_info = violate_punishment_accord_info['punishment_info'] accord_info = violate_punishment_accord_info['accord_info'] try: cursor.execute( contain_accord_insert_sql, (law_id, violate_info[0], violate_info[1], violate_info[2], law_id, punishment_info[0], punishment_info[1], punishment_info[2], law_id, accord_info[0], accord_info[1], accord_info[2], violate_info[3], punishment_info[3], accord_info[3], int(is_contain_accord))) conn.commit() except Exception as e: conn.rollback() print(e) else: contain_accord_insert_sql = '''insert into violate_punishment_accord (violate_law_id, violate_chapter_id, violate_article_id, violate_sentence_id, punishment_law_id, punishment_chapter_id, punishment_article_id, punishment_sentence_id, violate_content, punishment_content, is_contain_accord) value (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''' violate_info = violate_punishment_accord_info['violate_info'] punishment_info = violate_punishment_accord_info['punishment_info'] try: cursor.execute( contain_accord_insert_sql, (law_id, violate_info[0], violate_info[1], violate_info[2], law_id, punishment_info[0], punishment_info[1], punishment_info[2], violate_info[3], punishment_info[3], int(is_contain_accord))) conn.commit() except Exception as e: conn.rollback() print(e)
def law_info_segment(): cursor = conn.cursor() select_sql = '''select * from law''' cursor.execute(select_sql) results = cursor.fetchall() property_list = [] for i in LAW_PROPERTY: property_list.append(i) for res in results: law_id = res[0] for property_name in property_list: index = LAW_PROPERTY[property_name] property_val = res[index] if property_val is None or property_val == '': property_val = '未知' update_sql = 'update law set %s' % property_name + ' = %s where id = %s' cursor.execute(update_sql, (property_val, law_id)) conn.commit() print(law_id, property_name, res[index], property_val)
def insert_new_relation_base_type(relation_type, data): cursor = conn.cursor() table = 'new_' + relation_type + '_relation' insert_sql = "insert into %s" % table + \ "(law_id, chapter_id, sentence_id, parse_sentence, subject, relation, object, relation_type) " \ "value (%s, %s, %s, %s, %s, %s, %s, %s)" cursor.execute(insert_sql, ( data['law_id'], data['chapter_id'], data['sentence_id'], data['parse_sentence'], data['subject'], data['relation'], data['object'], relation_type, )) conn.commit() print(relation_type, 'insert success!')
def article_2_key_process(): # 将不包含“章”的条款的条款序号统一为 “第XX条” select_sql = "select id, a_key from article_2" update_sql = "update article_2 set a_key = %s where id = %s" cursor = conn.cursor() cursor.execute(select_sql) articles = cursor.fetchall() for article in articles: if '条' not in article[1]: article_key = '第' + str(article[1]).replace('、', '') + '条' try: cursor.execute(update_sql, (article_key, article[0])) conn.commit() print( str(article[0]) + article_key + '--------------------UPDATE SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m' + str(article[0]) + article_key + e + ': ARTICLE FAILED---------' + '\033[0m')
def update_forestry_subject(): query_forestry_subject = '''select * from forestry_subject''' query_forbid_1 = '''select forbid_subject from forbid_1 group by forbid_subject''' insert_sql = '''insert into forestry_subject (subject) value (%s)''' subject_list = [] CURSOR.execute(query_forestry_subject) forestry_subjects = CURSOR.fetchall() for subject in forestry_subjects: subject_list.append(subject[1]) CURSOR.execute(query_forbid_1) results = CURSOR.fetchall() for res in results: if res[0] in subject_list: continue else: subject_list.append(res[0]) CURSOR.execute(insert_sql, (res[0],)) conn.commit() print(res[0])
def class_one_sentences_extracte(): # 第一类文本的单句提取 pattern = re.compile("第(.*?)(?:章|条)") # 定义正则表达式用以判断是否是第一类 cursor = conn.cursor() select_sql = "select id, law_id, p_key, p_content from law_content_parse" cursor.execute(select_sql) results = cursor.fetchall() complex_count = 0 single_count = 0 insert_complex_sentence = "insert into sentences (law_id, title_id, sentence, is_single) " \ "value (%s, %s, %s, %s)" for res in results: if pattern.match(res[2]): title_id = res[0] # law_content_parse的主键ID,对应sentences表中的title_id law_id = res[1] # 对应法律法规id if ':' in str(res[3]): complex_count = complex_count + 1 try: cursor.execute(insert_complex_sentence, (law_id, title_id, res[3], 0)) conn.commit() print(str(res[3]) + '-----------Success') except Exception as e: print('\033[1;32;41m' + str(res[3]) + ': FAILED---------' + '\033[0m') conn.rollback() print(e) else: single_count = single_count + 1 sentences = str(res[3]).split('\n') for sentence in sentences: if len(sentence) != 0: try: cursor.execute(insert_complex_sentence, (law_id, title_id, sentence, 1)) conn.commit() print(str(sentence) + '-----------Success') except Exception as e: print('\033[1;32;41m' + sentence + ': FAILED---------' + '\033[0m') conn.rollback() print(e)
def forest_fire_prevention_parser(file_path): dir_path = "C:\\Users\\dhz1216\\Desktop\\washing\\森林防火" file_name = file_path.split("\\")[-1] cursor = conn.cursor() select_sql = "select id, name from law where text_name = %s" cursor.execute(select_sql, (file_name.split('.')[0])) law_id = cursor.fetchone()[0] with open(file_path, "r", encoding='gbk', errors='ignore') as f: line = f.readline() while line: if line.startswith('【法规全文】'): with open(dir_path + "\\" + file_name, "a") as w: line = line.replace('【法规全文】', '') # line = f.readline() while line: if len(line.lstrip().split(' ')) > 1: key_title = line.lstrip().split(' ')[0] value_content = line.lstrip().split(' ')[1] line = f.readline() while line: if len(line.lstrip().split(' ')) <= 1: value_content = value_content + line.lstrip().split(' ')[0] line = f.readline() else: break w.write(key_title + ':' + value_content + '\n') insert_sql = "insert into law_content (law_id, p_key, p_content, law_class) " \ "value (%s, %s, %s, %s)" try: cursor.execute(insert_sql, (law_id, key_title, value_content, '森林防火')) conn.commit() print('\033[1;37;40m' + file_name + ': PARSE SUCCESS' + '\033[0m') except Exception as e: print(e) conn.rollback() print('\033[1;32;41m' + file_name + ': PARSE FAILED---------' + '\033[0m') else: line = f.readline() else: line = f.readline()
def location_extract(): # 运用pyltp的分词和词性标注,识别法律法规所属地区(省市) select_sql = "select id, name from law" update_sql = "update law set location = %s where id = %s" cursor = conn.cursor() select_special_city_sql = "select name from city where name not like '%市'" cursor.execute(select_sql) results = cursor.fetchall() cursor.execute(select_special_city_sql) select_special_city = cursor.fetchall() special_city_list = list() for c in select_special_city: special_city_list.append(c[0]) for result in results: title = result[1] location = '' for city in special_city_list: if city in title: location = city break if location is None or location == '': words = list(segmentor.segment(title)) postag = list(postagger.postag(words)) for index in range(len(words)): if postag[index] == 'ns': location = location + words[index] if postag[index + 1] == 'ns': location = location + words[index + 1] if postag[index + 2] == 'ns': location = location + words[index + 2] break if location is None or location == '' or len(location) <= 1: location = '中华人民共和国' try: cursor.execute(update_sql, (location, result[0])) conn.commit() print(str(result[0]) + result[1] + '-----------SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m' + str(result[0]) + result[1] + e + '\033[0m')
def law_to_class_process(): # 为mysql中law表添加class_id一列,对应类别的ID cursor = conn.cursor() law_select = "select id, type, name from law" class_select_sql = "select id from law_class where type = %s" update_sql = "update law set class_id = %s where id = %s" cursor.execute(law_select) laws = cursor.fetchall() for law in laws: law_id = law[0] law_type = law[1] cursor.execute(class_select_sql, (law_type)) class_info = cursor.fetchone() class_id = class_info[0] try: cursor.execute(update_sql, (class_id, law_id)) conn.commit() print(law[2] + '-----------------SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m' + law[2] + ': PARSE FAILED---------' + e + '\033[0m')
def forbid_act_save(forbid_list_1, forbid_list_2, forbid_list_3): insert_sql_1 = '''insert into forbid_1 (law_id, chapter_id, sentence_id, forbid_subject, forbid_action) value (%s, %s, %s, %s, %s)''' insert_sql_2 = '''insert into forbid_2 (law_id, chapter_id, sentence_id, forbid_subject, forbid_action) value (%s, %s, %s, %s, %s)''' insert_sql_3 = '''insert into forbid_3 (law_id, chapter_id, sentence_id, forbid_action) value (%s, %s, %s, %s)''' for forbid_act in forbid_list_1: law_id = forbid_act[0] chapter_id = forbid_act[2] sentence_id = forbid_act[3] forbid_subject = forbid_act[4] forbid_action = forbid_act[5] CURSOR.execute(insert_sql_1, (law_id, chapter_id, sentence_id, forbid_subject, forbid_action)) conn.commit() print(forbid_act) for forbid_act in forbid_list_2: law_id = forbid_act[0] chapter_id = forbid_act[2] sentence_id = forbid_act[3] forbid_subject = forbid_act[4] forbid_action = forbid_act[5] CURSOR.execute(insert_sql_2, (law_id, chapter_id, sentence_id, forbid_subject, forbid_action)) conn.commit() print(forbid_act) for forbid_act in forbid_list_3: law_id = forbid_act[0] chapter_id = forbid_act[2] sentence_id = forbid_act[3] forbid_action = forbid_act[4] CURSOR.execute(insert_sql_3, (law_id, chapter_id, sentence_id, forbid_action)) conn.commit() print(forbid_act)
def update_article(): select_article_1_sentence = '''select * from article_1_sentence where is_single = 0''' select_article_2_sentence = '''select * from article_2_sentence where is_single = 0''' update_article_1_sentence = '''update article_1_sentence set content = %s where id = %s''' update_article_2_sentence = '''update article_2_sentence set content = %s where id = %s''' cursor = conn.cursor() cursor.execute(select_article_1_sentence) article_1_sentences = cursor.fetchall() for sentence in article_1_sentences: sentence_id = sentence[0] content = sentence[3] + ':' try: cursor.execute(update_article_1_sentence, (content, sentence_id)) conn.commit() print(str(sentence_id), '-1-', content, '------------------SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m', str(sentence_id), '-2-', e, '-------FAILED', '\033[0m') print( '\n', '=============================================================================================', '\n') cursor.execute(select_article_2_sentence) article_2_sentences = cursor.fetchall() for sentence in article_2_sentences: sentence_id = sentence[0] content = sentence[3] + ':' try: cursor.execute(update_article_2_sentence, (content, sentence_id)) conn.commit() print(str(sentence_id), '-2-', content, '------------------SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m', str(sentence_id), '-2-', e, '-------FAILED', '\033[0m')
def save_relation(relation_list, law_id, content_class, chapter_id, sentence_id): cursor = conn.cursor() insert_sql = '''insert into extract_relation (law_id, class, chapter_id, sentence_id, is_contain, subject, relation, object) value (%s, %s, %s, %s, %s, %s, %s, %s)''' for relation in relation_list: subject = relation[0] relation_name = relation[1] object = relation[2] is_contain = 0 if object == '根据章节条款信息补全list': is_contain = 1 try: cursor.execute(insert_sql, (law_id, content_class, chapter_id, sentence_id, is_contain, subject, relation_name, object)) conn.commit() print(subject, relation_name, object, '--------saved--------') except Exception as e: conn.rollback() print('\033[1;32;41m' + relation + e + ': FAILED---------' + '\033[0m')
def merge_forbid_action(): select_fobid_2 = '''select law_id, chapter_id, sentence_id, forbid_subject from forbid_2 GROUP BY law_id, forbid_subject, chapter_id, sentence_id''' select_forbid_3 = '''select law_id, chapter_id, sentence_id, forbid_action from forbid_3''' insert_sql = '''insert into forbid_action (law_id, chapter_id, sentence_id, forbid_action) value (%s, %s, %s, %s)''' CURSOR.execute(select_fobid_2) results_2 = CURSOR.fetchall() for res in results_2: law_id = res[0] chapter_id = res[1] sentence_id = res[2] forbid_action = res[3] CURSOR.execute(insert_sql, (law_id, chapter_id, sentence_id, forbid_action)) conn.commit() CURSOR.execute(select_forbid_3) results_3 = CURSOR.fetchall() for res in results_3: law_id = res[0] chapter_id = res[1] sentence_id = res[2] forbid_action = res[3] CURSOR.execute(insert_sql, (law_id, chapter_id, sentence_id, forbid_action)) conn.commit()
def init_relation_collection(filter_colum, key, relation_type, num): cursor = conn.cursor() key_word = '%' + key + '%' select_srl_results = "select * from semantic_role_label_result where %s" % filter_colum \ + " like %s and parse_sentence not like %s group by parse_sentence" insert_relation_classify = '''insert into relation_classify (law_id, class, chapter_id, sentence_id, complete_sentence, parse_sentence, relation_type, is_complex) value (%s, %s, %s, %s, %s, %s, %s, %s)''' num_reg = '[0-9]+' head_reg = '^[一二三四五六七八(1234567890]' count = 0 cursor.execute(select_srl_results, ('%' + key + '%', '%所有权%')) define_resutlts = cursor.fetchall() for res in define_resutlts: parse_sentence = str(res[6]).strip() if re.search(num_reg, parse_sentence) or re.search( head_reg, parse_sentence): continue else: count = count + 1 if count % num == 0: law_id = res[1] article_class = res[2] chapter_id = res[3] sentence_id = res[4] complete_sentence = res[5] is_comlex = res[10] cursor.execute(insert_relation_classify, (law_id, article_class, chapter_id, sentence_id, complete_sentence, parse_sentence, relation_type, is_comlex)) conn.commit() print(relation_type, ' insert success') # print(parse_sentence) print(count)
def article_1_sentence_extract(): # 将article_1 的句子尽心分割提取 select_sql = "select * from article_1" cursor = conn.cursor() cursor.execute(select_sql) articles = cursor.fetchall() for article in articles: article_1_id = article[0] article_1_content = article[2] insert_article_1_sentence_sql = '''insert into article_1_sentence (article_1_id, is_single, content) value (%s, %s, %s)''' if ':' in article_1_content: is_single = 0 article_1_sentence_content = str(article_1_content).split( ':')[0].replace(" ", "") try: cursor.execute( insert_article_1_sentence_sql, (article_1_id, is_single, article_1_sentence_content)) conn.commit() except Exception as e: conn.rollback() print('\033[1;32;41m' + str(article_1_id) + article_1_sentence_content + e + ': FAILED---------' + '\033[0m') article_1_clauses = str(article_1_content).split(':')[1].split( "\n") select_article1_sentence_id = '''SELECT id from article_1_sentence where id = (SELECT max(id) FROM article_1_sentence);''' cursor.execute(select_article1_sentence_id) sentence_id = cursor.fetchone()[0] for article_1_clause in article_1_clauses: if article_1_clause is not None and article_1_clause != '': insert_article_1_clause_sql = '''insert into article_1_clause (article_1_id, article_1_sentence_id, clause_content) value (%s, %s, %s)''' try: cursor.execute( insert_article_1_clause_sql, (article_1_id, sentence_id, str(article_1_clause).replace(" ", ""))) conn.commit() except Exception as e: conn.rollback() print('\033[1;32;41m' + str(article_1_id) + article_1_clause + e + ': FAILED---------' + '\033[0m') print(article[2] + '============================================SUCCESS') else: is_single = 1 try: cursor.execute(insert_article_1_sentence_sql, (article_1_id, is_single, article_1_content)) conn.commit() print(article_1_content + '=========================================SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m' + str(article_1_id) + '--' + e + ': FAILED---------' + '\033[0m')
def law_province_code_update(): # 提取法律所在省份的省份代码并更新law表的province_code字段 select_sql = "select id, name, location, location_code from law" update_sql = "update law set province_code = %s where id = %s" cursor = conn.cursor() cursor.execute(select_sql) laws = cursor.fetchall() for law in laws: law_id = law[0] law_name = law[1] law_location = law[2] if law[3] is not None: # location_code不为None的时候更新 law_location_code = law[3] province_code = str(law_location_code)[0:2] + '0000' else: province_code = '000000' try: cursor.execute(update_sql, (province_code, law_id)) conn.commit() print(law_name + '-------------------SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m' + law_name + ': FAILED---------' + e + '\033[0m')
def entity_wash(): cursor = conn.cursor() chinese_pattern = "[\\u4e00-\\u9fa5\u3002\uff1b\uff0c\uff1a\u201c\u201d\uff08\uff09\u3001\uff1f\u300a\u300b]+" for class_type in SINGLE_RELATION_CLASS: table_name = class_type + '_relation' select_sql = 'select * from %s' % table_name cursor.execute(select_sql) results = cursor.fetchall() for relation in results: id = relation[0] subject = relation[4] object = relation[6] cn_pattern_res = re.search(chinese_pattern, subject, re.M | re.I) if cn_pattern_res and cn_pattern_res.start( ) > 0 and cn_pattern_res.start() < 4: subject = cn_pattern_res.group(0) if subject[-1] == ',' or subject[-1] == '。' or subject[-1] == ',': subject = subject[:-1] if object[-1] == ',' or object[-1] == '。' or object[-1] == ',': object = object[:-1] update_sql = 'update %s ' % table_name + 'set subject = %s, object = %s where id = %s' cursor.execute(update_sql, (subject, object, id)) conn.commit() print(class_type, id, subject, object)
def subject_type_classify(): park_words = [ '公园', '保护区', '区域', '名胜区', '风景区', '景区', '地区', '区', '湿地', '范围', '基地', '山', '湖', '景观' ] org_words = [ '政府', '机构', '部门', '企业', '局', '厅', '单位', '指挥部', '机关', '管委会', '委员会', '公司', '站', '部', '委', '办' ] park_list = [] org_list = [] cursor = conn.cursor() select_sql = '''select * from new_forestry_subject_final''' update_sql = '''update new_forestry_subject_final set subject_type = %s where id = %s''' cursor.execute(select_sql) results = cursor.fetchall() for res in results: id = res[0] subject = res[1] subject_type = '' for p_word in park_words: if str(subject).endswith(p_word): subject_type = 'PARK' park_list.append(subject) for o_word in org_words: if str(subject).endswith(o_word): subject_type = 'ORG' org_list.append(subject) if subject_type == '': subject_type = 'FORESTRY' cursor.execute(update_sql, (subject_type, id)) conn.commit() print(subject, 'update success!')
def complex_main_sentence_analysis(): # 非单句主句依存句法分析以及语义角色标注,结果入库 start_time = time.time() select_sql = '''select * from sentences where is_single = 0''' cursor = conn.cursor() cursor.execute(select_sql) complex_sentences = cursor.fetchall() for sentence in complex_sentences: sentence_id = sentence[0] main_sentence = str(sentence[3]).strip().split(':')[0] + ':' origin_words = list(segmentor.segment(main_sentence)) # 分词 origin_postags = list(postagger.postag(origin_words)) # 词性标注 arcs = parser.parse(origin_words, origin_postags) # 依存句法分析 roles = labeller.label(origin_words, origin_postags, arcs) # 语义角色标注 print('语义角色标注--------', str(len(roles))) # 语义角色标注信息提取并存入数据库 core_verb_list = list() insert_role_label_sql = '''insert into role_label (sentence_id, arg_name, arg_start, arg_end, core_verb_index) value (%s, %s, %s, %s, %s)''' for role in roles: core_verb_list.append(role.index) # 建立核心动词索引列表 for arg in role.arguments: arg_name = arg.name arg_start = arg.range.start arg_end = arg.range.end # 将语义角色标注信息插入到role_label表中 try: cursor.execute(insert_role_label_sql, (sentence_id, arg_name, arg_start, arg_end, role.index)) conn.commit() print(str(sentence_id), main_sentence, '-----------', origin_words[role.index], arg_name, '-------------SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m' + str(sentence_id) + main_sentence + e + ': ---------FAILED---------' + '\033[0m') # 提取动词信息,并插入数据库 print('提取动词信息-------------------------------------------') insert_verb_sql = '''insert into verb (sentence_id, part_of_speech, loc_index, is_core) value (%s, %s, %s, %s)''' for index in range(len(origin_words)): if origin_postags[index] == 'v': is_core = 0 if index in core_verb_list: is_core = 1 try: cursor.execute(insert_verb_sql, (sentence_id, 'v', index, is_core)) conn.commit() print(str(index), '--', origin_words[index], '-----------------SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m' + str(index) + origin_words[index] + e + ': ---------FAILED---------' + '\033[0m') # 提取其他词和动词的关系,没有关系的设为NONE arc_head = [a.head for a in arcs] arc_relation = [a.relation for a in arcs] tree_node_list = ['ROOT'] + origin_words postags = ['NONE'] + origin_postags for i in range(len(arc_head)): j = arc_head[i] head_index = j - 1 tail_index = i relation = arc_relation[i] if arc_relation[i] == 'HED': update_verb_sql = '''update verb set is_head = 1 where sentence_id = %s and loc_index = %s''' print('更新根动词情况:') try: cursor.execute(update_verb_sql, (sentence_id, i)) conn.commit() print('根动词-----index: ', str(i), '----', origin_words[i], '-----UPDATE SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m' + '根动词---' + str(i) + origin_words[i] + e + ': ---------FAILED---------' + '\033[0m') continue if head_index not in core_verb_list and tail_index not in core_verb_list: continue elif head_index in core_verb_list: part_of_speech = postags[i + 1] core_verb_index = head_index word = origin_words[i] loc = 'tail' else: part_of_speech = postags[j] core_verb_index = tail_index word = tree_node_list[j] loc = 'head' # TODO:----------------------------数据库插入操作-------------------------------------------- insert_words_sql = '''insert into words (sentence_id, part_of_speech, core_verb_index, relation, word, head_or_tail) value (%s, %s, %s, %s, %s, %s)''' try: cursor.execute(insert_words_sql, (sentence_id, part_of_speech, core_verb_index, relation, word, loc)) conn.commit() print(tree_node_list[j], postags[j], '----', origin_words[i], postags[i + 1], relation, '-----SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m', tree_node_list[j], postags[j], '----', origin_words[i], postags[i + 1], relation, '-----FAILED', e, '\033[0m') print( '\n', '===============================================================', '\n') end_time = time.time() print('处理', str(len(complex_sentences)), '条数据的总耗时为:', str(end_time - start_time), 's')
def chapter_article_process( ): # 法一(有问题):将法律文本的条款信息做进一步分表,分为两类,第一类包含“章”大标题,第二类只含条款 select_sql = "select * from law_content_parse" cursor = conn.cursor() cursor.execute(select_sql) contents = cursor.fetchall() index = 0 while index < len(contents): pattern_chapter = re.compile("第(.*?)章") pattern_article = re.compile("第(.*?)条") match_chapter = pattern_chapter.match(contents[index][2]) if match_chapter: # 此处将章的信息插入chapter insert_chapter_sql = "insert into chapter (chapter_key, chapter_name, law_id) value (%s, %s, %s)" try: cursor.execute(insert_chapter_sql, (contents[index][2], contents[index][3], contents[index][1])) conn.commit() print(contents[index][5] + '----' + contents[index][2] + '----------------SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m' + contents[index][5] + e + ': CHAPTER FAILED---------' + '\033[0m') return index = index + 1 while index < len(contents): select_chapter_sql = 'SELECT id from chapter where id = (SELECT max(id) FROM chapter);' cursor.execute(select_chapter_sql) # TODO: 此处添加逻辑判断是否已经读到下一篇法规 chapter_id = cursor.fetchone()[0] match_article = pattern_article.match(contents[index][2]) if match_article: # 此处插入article信息 insert_article1_sql = "insert into article_1 (a_key, a_content, chapter_id) value (%s, %s, %s)" try: cursor.execute(insert_article1_sql, (contents[index][2], contents[index][3], chapter_id)) conn.commit() print(contents[index][5] + '----' + contents[index][2] + '----------------SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m' + contents[index][5] + e + ': ARTICLE FAILED---------' + '\033[0m') return index = index + 1 else: print('-----------------------------' + contents[index][5] + '----------------------------------') break else: insert_article2_sql = "insert into article_2 (a_key, a_content, law_id) value (%s, %s, %s)" try: cursor.execute(insert_article2_sql, (contents[index][2], contents[index][3], contents[index][1])) conn.commit() print(contents[index][5] + '----' + contents[index][2] + '----------------SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m' + contents[index][5] + e + ': ARTICLE FAILED---------' + '\033[0m') return index = index + 1
def chapter_article_process_2( ): # 法二:将法律文本的条款信息做进一步分表,分为两类,第一类包含“章”大标题,第二类只含条款 cursor = conn.cursor() pattern_chapter = re.compile("第(.*?)章") pattern_article = re.compile("第(.*?)条") select_law_id_sql = "select law_id from law_content_parse group by law_id" # 先统计出law_id, 保存到list当中 cursor.execute(select_law_id_sql) law_id_tuple = cursor.fetchall() law_id_list = list() for law in law_id_tuple: law_id_list.append(law[0]) # 按照id查询law_content_parse, 并做处理 select_law_content_sql = "select * from law_content_parse where law_id = %s" for law_id in law_id_list: cursor.execute(select_law_content_sql, (law_id, )) contents = cursor.fetchall() index = 0 while index < len(contents): match_chapter = pattern_chapter.match(contents[index][2]) if match_chapter: # 此处将章的信息插入chapter insert_chapter_sql = "insert into chapter (chapter_key, chapter_name, law_id) value (%s, %s, %s)" try: cursor.execute( insert_chapter_sql, (contents[index][2], contents[index][3], law_id)) conn.commit() print(contents[index][5] + '----' + contents[index][2] + '----------------SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m' + contents[index][5] + e + ': CHAPTER FAILED---------' + '\033[0m') return index = index + 1 while index < len(contents): select_chapter_sql = 'SELECT id from chapter where id = (SELECT max(id) FROM chapter);' cursor.execute(select_chapter_sql) chapter_id = cursor.fetchone()[0] match_article = pattern_article.match(contents[index][2]) if match_article: # 此处插入article信息 insert_article1_sql = "insert into article_1 (a_key, a_content, chapter_id) value (%s, %s, %s)" try: cursor.execute(insert_article1_sql, (contents[index][2], contents[index][3], chapter_id)) conn.commit() print(contents[index][5] + '----' + contents[index][2] + '----------------SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m' + contents[index][5] + e + ': ARTICLE FAILED---------' + '\033[0m') return index = index + 1 else: print('-----------------------------' + contents[index][5] + '----------------------------------') break else: insert_article2_sql = "insert into article_2 (a_key, a_content, law_id) value (%s, %s, %s)" try: cursor.execute( insert_article2_sql, (contents[index][2], contents[index][3], law_id)) conn.commit() print(contents[index][5] + '----' + contents[index][2] + '----------------SUCCESS') except Exception as e: conn.rollback() print('\033[1;32;41m' + contents[index][5] + e + ': ARTICLE FAILED---------' + '\033[0m') return index = index + 1