def spot_check_execute(self, pageSoup): br_keyword = ["chouchaxinxi"] key_list = [ 'xuhao', 'authority', 'spot_type', 'spot_date', 'spot_result' ] dict_ba_list = common.get_dict_list(pageSoup, key_list, br_keyword) return dict_ba_list
def black_info_execute(self, pageSoup): key_list = [ 'xuhao', 'reason_in', 'date_in', 'reason_out', 'date_out', 'authority', 'xiangqing' ] br_keyword = ["yanzhongweifaqiye"] dict_ba_list = common.get_dict_list(pageSoup, key_list, br_keyword) return dict_ba_list
def stock_freeze_execute(self, pageSoup): br_keyword = ["EquityFreezeDiv"] key_list = [ 'xuhao', 'person', 'stock', 'court', 'notice_number', 'statues', 'xiangqing' ] dict_ba_list = common.get_dict_list(pageSoup, key_list, br_keyword) return dict_ba_list
def stockholder_change_execute(self, pageSoup): key_list = [ 'xuhao', 'person', 'stock', 'person_get', 'court', 'xiangqing' ] br_keyword = ["xzcfDiv"] dict_ba_list = [] dict_ba_list = common.get_dict_list(pageSoup, key_list, br_keyword) if dict_ba_list != []: raise ValueError("dict_ba_list is not empty.") else: return dict_ba_list
def mul_page(pageSoup, page_tag, url_page_part, key_list, dlink, idname, glb_id): # 这个只能判断初始的pageSoup中带的翻页tag,如果页数太多的话是不ok的,因为改变后的pageSoup不会找得到的。 ''' id, idname: 针对动产抵押详情页面的翻页情况,key并不等于'mainId',需要idname传值,key对应的value用id存; pagelt: page limit, 页数限制边缘值,用于range中,比最大页数大1,初始值为2; i:结合page_tag,查找是否存在翻页的情况,并将找到的最大值赋值给pagelt,因此从2开始循环查找; j: 获取翻页html时传data需要用到. ''' dict_ba_list = [] id = glb_id url_home = "http://gxqyxygs.gov.cn/" id = id.split("=")[-1] pagelt = 2 for i in range(2, 100): page_i_tag = pageSoup.find(id=page_tag + str(i)) if page_i_tag == None: pagelt = i # max j is i-1. break for j in range(1, pagelt): url = url_home + url_page_part if idname != "": # 针对抵押物的翻页 if dlink != "": a = re.findall("[0-9]+", dlink) if a != []: id = a[0] data = {'pno': str(j), idname: id} else: data = {'pno': str(j), 'mainId': id} pageSoup = get_html(url, data) if pageSoup != None: br_keyword = [""] dict_ba_list_0 = common.get_dict_list(pageSoup, key_list, br_keyword) dict_ba_list = dict_ba_list + dict_ba_list_0 return dict_ba_list