def download_new_testcases(): db_sc_cases = DBOperation.MyDatabase('127.0.0.1', 'root', '082666') case_list = db_sc_cases.get(StaticUtils.case_table, 'name, doc_id, court, YEAR(DATE)', 'download=\'no\'') total = len(case_list) i = 0 for case in case_list: case_name, case_doc_id, case_court, case_year = case print(case_name, case_doc_id, case_court, case_year) try: case_text = download_case(case_doc_id) except Exception as e: print(e) db_sc_cases.commit() print("Sleep 2s ...") time.sleep(2) if case_text: verdict = VerdictAnalyser.VerdictAnalyser(case_text) print(f"{i}/{total} case {case_name} is downloaded.") db_sc_cases.update(StaticUtils.case_table, 'download', '\'yes\'', f'doc_id=\'{case_doc_id}\'') db_sc_cases.update(StaticUtils.case_table, 'content', f'\'{verdict.content}\'', f'doc_id=\'{case_doc_id}\'') db_sc_cases.commit() else: db_sc_cases.update(StaticUtils.case_table, 'download', '\'empty\'', f'doc_id=\'{case_doc_id}\'') print(f"{i}/{total} case {case_name} is empty.") i += 1 db_sc_cases.commit() db_sc_cases.close()
def main(): db_sc_cases = DBOperation.MyDatabase('127.0.0.1', 'root', '082666') t0 = time.time() cases = analyse_case(db_sc_cases) t1 = time.time() update_to_db(db_sc_cases, cases) t2 = time.time() db_sc_cases.commit() t3 = time.time() print(t1 - t0, t2 - t1, t3 - t2) db_sc_cases.close()
def download_case_list_by_upload_period(year, start_date, end_date): search_criteria = "案件类型:刑事案件,审判程序:一审,法院地域:四川省,裁判年份:{},文书类型:判决书,上传日期:{} TO {}".format(year, start_date, end_date) total_number = get_total_number(search_criteria) if int(total_number) == 0: return None max_page = int(total_number) // 20 if int(total_number) % 20 == 0 else (int(total_number) // 20) + 1 cases = download_all_caselist(search_criteria, max_page) db_sc_cases = DBOperation.MyDatabase('127.0.0.1', 'root', '082666') length = len(cases['name']) for i in range(length): data = dict() for key in cases: if key == 'procedure': data['trial'] = cases[key][i] else: data[key] = cases[key][i] data['download'] = 'no' data['upload_date'] = start_date doc_id = db_sc_cases.get(StaticUtils.case_table, 'doc_id', 'doc_id=\'{}\''.format(data['doc_id'])) if not doc_id: fields_list = ["name", "doc_id", "date", "case_id", "trial", "court", "download", "upload_date"] values = '' for key in fields_list: # transfer to str if it's a int if isinstance(data[key], int): values = values + str(data[key]) else: if values: values = values + ",\'" + data[key] + "\'" else: values = "(\'" + data[key] + "\'" values = values + ")" fields = ','.join(fields_list) db_sc_cases.insert(CASE_TABLE, fields, values) # db_sc_cases.insert(StaticUtils.case_table, data) db_sc_cases.commit() db_sc_cases.close()
def get_latest_upload_date(): db_sc_cases = DBOperation.MyDatabase('127.0.0.1', 'root', '082666') result = db_sc_cases.get_max_record(StaticUtils.case_table, 'upload_date') db_sc_cases.close() return result[0]