def main(**kwargs): url = kwargs.get('id_tag') global proxies proxies = kwargs.get('proxies') headers = { 'Host': 'sh.gsxt.gov.cn', 'User-Agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/54.0.2840.99 Safari/537.36'), 'Referer': 'http://sh.gsxt.gov.cn/notice/search/ent_info_list' } resp = url_requests.get(url, headers=headers, proxies=proxies) page_soup = BeautifulSoup(resp.content, 'lxml') if len(resp.content) < 1000: url = page_soup.find('script').text.split("'")[1] resp = url_requests.get(url, headers=headers, proxies=proxies) page_soup = BeautifulSoup(resp.content, 'lxml') all_execute = [ 'basicinfo_execute', 's_h_execute', 'b_c_execute', 'member_execute', 'branch_execute', 'adm_punishment_execute', 'abnormal_execute', 'mortgage_basic_execute', 'pledge_execute', 'black_info_execute', 'spot_check_execute', 'stock_freeze_execute', 'stockholder_change_execute', ] mortgage_execute = [ 'c_mortgage_execute', 's_creditor_execute', 'mortgage_execute' ] credit = CompanyInfo(url) # 实例化 for each in all_execute: print "Executing: %s" % each getattr(credit, each)(page_soup) # businessInfo.pledge_execute(page_soup) if credit.qyxx_mortgage_basic: for each in credit.qyxx_mortgage_basic: url = ('http://sh.gsxt.gov.cn/notice/notice/view_mortage?uuid' '=') + each['xiangqing'].split("'")[1] resp = url_requests.get(url, headers=headers, proxies=proxies) mortgage_soup = BeautifulSoup(resp.content, 'lxml') for each_mort in mortgage_execute: print "Executing: %s" % each_mort getattr(credit, each_mort)(mortgage_soup) # credit.c_mortgage_execute(page_soup) results = credit.returnData() return results
def get_raw_img(self): headers = { 'User-Agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/54.0.2840.71 Safari/537.36') } r_bg = url_requests.get(self.bg_url, headers=headers, proxies=proxies) r_fullbg = url_requests.get(self.fullbg_url, headers=headers, proxies=proxies) raw_chunk_img = BytesIO(r_bg.content) raw_source_img = BytesIO(r_fullbg.content) return raw_source_img, raw_chunk_img
def branch_execute(self, **kwargs): key_list = ['xuhao', 'company_num', 'company_name', 'authority'] all_url = kwargs.get('all_url') keyword = 'branchUrlAll' # if '个体工商户' in self.qyxx_basicinfo[0]['company_type']: # keyword = 'gtkeyPersonUrl' url = None for each in all_url: if keyword in each: url = 'http://www.gsxt.gov.cn' + each.split('"')[1].strip() break if url: headers = get_headers() response = url_requests.get(url=url, headers=headers, proxies=proxies) url = 'http://www.gsxt.gov.cn' + re.search( 'branchUrlData ="(.*?)"', response.content, re.S).group(1) headers = get_headers() response = url_requests.get(url=url, headers=headers, proxies=proxies) json_data = json.loads(response.content) all_data = json_data.get('data') total_page = json_data.get('totalPage') per_page = json_data.get('perPage') # 每页可获取的条目数 if total_page > 1: for page in range(1, total_page): form_data = { # 'draw': 1, 'start': page * per_page, # 'length': 5 } headers = get_headers() response = url_requests.get(url=url, params=form_data, headers=headers, proxies=proxies) json_data = json.loads(response.content) data = json_data.get('data') all_data.extend(data) result_list = [] for each in all_data: item = {} item['company_num'] = each.get('regNo', '') item['company_name'] = each.get('brName', '') item['authority'] = each.get('regOrg_CN', '') result_list.append(item) self.qyxx_branch = result_list
def member_execute(self, **kwargs): key_list = ['xuhao', 'person_name', 'p_position'] all_url = kwargs.get('all_url') keyword = 'keyPersonUrl' if '个体工商户' in self.qyxx_basicinfo[0]['company_type']: keyword = 'gtkeyPersonUrl' url = None for each in all_url: if keyword in each: url = 'http://www.gsxt.gov.cn' + each.split('"')[1].strip() break if url: headers = get_headers() response = url_requests.get(url=url, headers=headers, proxies=proxies) json_data = json.loads(response.content) all_data = json_data.get('data') total_page = json_data.get('totalPage') per_page = json_data.get('perPage') # 每页可获取的条目数 if total_page > 1: for page in range(1, total_page): form_data = { # 'draw': 1, 'start': page * per_page, # 'length': 5 } headers = get_headers() response = url_requests.get(url=url, params=form_data, headers=headers, proxies=proxies) json_data = json.loads(response.content) data = json_data.get('data') all_data.extend(data) result_list = [] for each in all_data: item = {} name = each.get('name', '') if name: name = base64.b64decode(name.split('>')[1].split('<')[0]) item['person_name'] = name item['p_position'] = each.get('position_CN', '').strip() result_list.append(item) self.qyxx_member = result_list
def stockholder_change_execute(self, page_soup): url = 'http://gx.gsxt.gov.cn/newChange/newChangeAction!getTabForNB_new.dhtml' # self.params["regno"] = self.regno self.params["urltag"] = '15' self.params["flag_num"] = '2' # self.params["ent_id"] = self.params["entId"] headers = { 'Host': 'gx.gsxt.gov.cn', 'User-Agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/54.0.2840.99 Safari/537.36') } resp = url_requests.get(url, headers=headers, params=self.params, proxies=proxies) page_soup = BeautifulSoup(resp.content, 'lxml') key_list = [ 'xuhao', 'person', 'stock', 'person_get', 'court', 'detail' ] soup = page_soup.find('div', {'id': 'layout-06_02_01'}) if soup: info = CreditInfo.parse(soup, 'table', {'class': 'tableG'}, key_list=key_list) self.qyxx_stockholder_change.extend(info)
def spot_check_execute(self, page_soup): url = 'http://gx.gsxt.gov.cn/gsgs/gsxzcfAction!xj_list_ccjcxx.dhtml' # self.params["regno"] = self.regno self.params["urltag"] = '10' self.params["ent_id"] = self.params["entId"] headers = { 'Host': 'gx.gsxt.gov.cn', 'User-Agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/54.0.2840.99 Safari/537.36') } resp = url_requests.get(url, headers=headers, params=self.params, proxies=proxies) page_soup = BeautifulSoup(resp.content, 'lxml') soup = page_soup.find('div', {'class': 'qyqx-detail'}) if soup: key_list = [ 'xuhao', 'authority', 'spot_type', 'spot_date', 'spot_result' ] info = CreditInfo.parse(soup, 'table', {'class': 'table-result'}, key_list=key_list) self.qyxx_spot_check.extend(info)
def pledge_execute(self, **kwargs): hyperlinks = kwargs.get('hyperlink') keyword = ['股权出资'] url = get_url(hyperlinks, self.query, keyword) params = kwargs.get('params') if url: response = url_requests.get(url=url, params=params, headers=self.headers, proxies=proxies) try: jsn = json.loads(response.content) items = jsn[0]['list'] except: print response.content raise for each_item in items: item = {} item['reg_code'] = each_item.get('equityno', '') item['pleder'] = each_item.get('pledgor', '') item['id_card'] = each_item.get('pledblicno', '') item['plede_amount'] = each_item.get('impam', '') item['brower'] = each_item.get('imporg', '') item['brower_id_card'] = each_item.get('imporgblicno', '') item['reg_date'] = each_item.get('equpledate', '') item['status'] = each_item.get('type_text', '') item['changes'] = '详情' self.qyxx_pledge.append(item)
def spot_check_execute(self): url = ('http://%s/aiccips//cipSpotCheInfo/cipSpotCheInfoList' % self.host) response = url_requests.get(url=url, params=self.data, headers=self.headers, proxies=proxies) items = json.loads(response.text)['list']['list'] # type is list if items: for each_item in items: item = {} item['authority'] = each_item.get('aicName', '') item['spot_type'] = each_item.get('typeStr', '') item['spot_date'] = each_item.get('insDate', '') if item['spot_date']: # 将时间戳转换成日期 date = time.localtime(item['spot_date'] / 1000) item['spot_date'] = time.strftime("%Y-%m-%d", date) item['spot_result'] = (each_item.get( 'inspectDetail', '').replace('1', '正常').replace( '2', '未按规定公示年报').replace('3', '未按规定公示其他应当公示的信息').replace( '4', '公示信息隐瞒真实情况、弄虚作假').replace( '5', '通过登记的住所(经营场所)无法联系').replace( '6', '不予配合情节严重').replace('7', '该主体已注销').replace( '8', '该主体未建财务账').replace('9', '其他')) self.qyxx_spot_check.append(item)
def black_info_execute(self, **kwargs): hyperlinks = kwargs.get('hyperlink') keyword = ['严重违法'] url = get_url(hyperlinks, self.query, keyword) params = kwargs.get('params') if url: response = url_requests.get(url=url, params=params, headers=self.headers, proxies=proxies) try: jsn = json.loads(response.content) items = jsn[0]['list'] except: print response.content raise for each_item in items: item = {} item['reason_in'] = each_item.get('serillrea_cn', '') item['date_in'] = each_item.get('abntime', '') item['authority_in'] = each_item.get('decorg_cn', '') item['reason_out'] = each_item.get('remexcpres_cn', '') item['date_out'] = each_item.get('remdate', '') item['authority_out'] = each_item.get('recorg_cn', '') if item['authority_out'] == '' and item['authority_in']: item['authority'] = item.pop('authority_in') item.pop('authority_out') else: item['authority'] = item.pop('authority_out') item.pop('authority_in') self.qyxx_black_info.append(item)
def get_register(self): ''' 可能会被更改值的参数: self.cookies self.challenge(32bit) self.gt ''' headers = { 'Cookie': '', #self.cookies 'Host': 'gx.gsxt.gov.cn', 'Referer': 'http://sc.gsxt.gov.cn/ztxy.do?method=index&random=1483924925344', #+ str(int(time.time() * 1000)), 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36' } resp = url_requests.get(self.url_start_captcha, headers=headers, proxies=proxies) print 11, resp.cookies # < RequestsCookieJar[ < Cookie JSESSIONID = yDcJY1kdRLNcQCGLxP4vL5XhLSc1jpvzJh4ZGp8sndsZjyLQ9QTQ!-1937398610 for gx.gsxt.gov.cn / >, < Cookie insert_cookie=72414398 for gx.gsxt.gov.cn / >] > #<RequestsCookieJar[<Cookie JSESSIONID=hx0GY1sVD85vBGHkhvNbMnNbLpyCy8PsM25D5N1WGH8kpyLnqdXp!1439207493 for gx.gsxt.gov.cn/>, <Cookie insert_cookie=62339635 for gx.gsxt.gov.cn/>]> # if len(str(resp.cookies).split(" "))>1: # self.cookies = self.cookies.split(";")[0] + ";" + str(resp.cookies).split(" ")[1] part_data = resp.json() if part_data['success'] == 1: self.challenge = part_data['challenge'] self.gt = part_data['gt'] resp.close() print 'self.challenge[30:]', self.challenge[30:]
def get_data(urls, keyword): '''获取版块信息 :param urls: List, 该列表来源于企业信息页面的源代码中,是每个版块的url后缀列表。 :param keyword: str, 板块关键字 :return: List,所需板块的数据 ''' result = [] url = None for each in urls: if keyword in each: url = 'http://www.gsxt.gov.cn' + each.split('"')[1].strip() break if url: headers = get_headers() response = url_requests.get(url=url, headers=headers, proxies=proxies) json_data = json.loads(response.content) all_data = json_data.get('data') # 先获取第一页的数据 total_page = json_data.get('totalPage') if total_page > 1: for page in range(1, total_page): form_data = {'draw': 1, 'start': page * 5, 'length': 5} headers = get_headers() response = url_requests.post(url=url, data=form_data, headers=headers, proxies=proxies) json_data = json.loads(response.content) data = json_data.get('data') all_data.extend(data) # 将翻页获取的数据合并 result = all_data return result
def b_c_execute(self, page_soup): url = 'http://gx.gsxt.gov.cn/gjjbj/gjjQueryCreditAction!xj_biangengFrame.dhtml' self.params["regno"] = self.regno self.params["urlflag"] = '5' self.params["ent_id"] = self.params["entId"] headers = { 'Host': 'gx.gsxt.gov.cn', 'User-Agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/54.0.2840.99 Safari/537.36') } resp = url_requests.get(url, headers=headers, params=self.params, proxies=proxies) page_soup = BeautifulSoup(resp.content, 'lxml') key_list = [ 'xuhao', 'reason', 'before_change', 'after_change', 'date_to_change' ] soup = page_soup.find('div', {'class': 'qyqx-detail'}) if soup: info = CreditInfo.parse(soup, 'table', {'class': 'table-result'}, key_list=key_list) self.qyxx_b_c.extend(info)
def get_checkCode(cookies): """用首页的cookie访问验证码,返回验证码字符串 params: cookies: 访问首页时产生的cookies值 """ url = 'http://gsxt.ynaic.gov.cn/notice/captcha?preset=0' headers = { 'Host': 'gsxt.ynaic.gov.cn', 'User-Agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/54.0.2840.71 Safari/537.36') } response = url_requests.get(url, headers=headers, cookies=cookies, timeout=10) f = BytesIO(response.content) image = Image.open(f) # image.show() # checkCode = raw_input('please input the checkCode: ') checkCode = image_recognition(image, 'yunnan', config='-psm 7 character') # print checkCode return checkCode
def mortgage_execute(self, **kwargs): mort_id = kwargs.get('mort_id') url = ('http://cq.gsxt.gov.cn/gsxt/api/mortguarantee/queryList' '/%s' % mort_id) params = kwargs.get('params') response = url_requests.get(url=url, params=params, headers=self.headers, proxies=proxies) try: jsn = json.loads(response.content) items = jsn[0]['list'] except: print response.content raise item_list = [] for each_item in items: item = {} item['mortgage_reg_num'] = self.mortgage_reg_num item['mortgage_name'] = each_item.get('guaname', '') item['belongs'] = each_item.get('own', '') item['information'] = each_item.get('guades', '') item['mortgage_range'] = each_item.get('remark', '') item_list.append(item) self.qyxx_mortgage.extend(item_list)
def s_creditor_execute(self, **kwargs): mort_id = kwargs.get('mort_id') url = ('http://cq.gsxt.gov.cn/gsxt/api/mortprincipalclaim/dcdydb' '/%s' % mort_id) params = kwargs.get('params') response = url_requests.get(url=url, params=params, headers=self.headers, proxies=proxies) try: jsn = json.loads(response.content) info = jsn[0]['form'] except: print response.content raise item = {} item['mortgage_reg_num'] = self.mortgage_reg_num item['mortgage_type'] = info.get('priclaseckind_cn', '') item['amount'] = info.get('priclasecam', '') + '万元' item['mortgage_range'] = info.get('warcov', '') item['time_range'] = info.get('pefperform', '') + '至' + info.get( 'pefperto', '') self.qyxx_s_creditor.append(item)
def c_mortgage_execute(self, **kwargs): mort_id = kwargs.get('mort_id') url = ('http://cq.gsxt.gov.cn/gsxt/api/mortreginfo/dcdy' '/%s' % mort_id) params = kwargs.get('params') response = url_requests.get(url=url, params=params, headers=self.headers, proxies=proxies) try: jsn = json.loads(response.content) info = jsn[0]['form'] except: print response.content raise item = {} item['mortgage_reg_num'] = info.get('morregcno', '') self.mortgage_reg_num = info.get('morregcno', '') item['date_reg'] = info.get('regidate', '') item['authority'] = info.get('regorg_cn', '') item['mortgage_type'] = '' item['amount'] = '' item['time_range'] = '' item['mortgage_range'] = '' self.qyxx_c_mortgage.append(item)
def stock_freeze_execute(self, **kwargs): hyperlinks = kwargs.get('hyperlink') keyword = ['司法协助'] url = get_url(hyperlinks, self.query, keyword) params = kwargs.get('params') if url: response = url_requests.get(url=url, params=params, headers=self.headers, proxies=proxies) try: jsn = json.loads(response.content) items = jsn[0]['list'] except: print response.content raise for each_item in items: item = {} item['person'] = each_item.get('inv', '') item['stock'] = each_item.get('infroam', each_item.get( 'froam', '')) item['court'] = each_item.get('froauth', '') item['notice_number'] = each_item.get('executeno', '') item['status'] = each_item.get('frozstate_cn', '') self.qyxx_stock_freeze.append(item)
def spot_check_execute(self, **kwargs): hyperlinks = kwargs.get('hyperlink') keyword = ['抽查检查'] url = get_url(hyperlinks, self.query, keyword) params = kwargs.get('params') if url: response = url_requests.get(url=url, params=params, headers=self.headers, proxies=proxies) try: jsn = json.loads(response.content) items = jsn[0]['list'] except: print response.content raise for each_item in items: item = {} item['authority'] = each_item.get('insauth_cn', '') item['spot_type'] = each_item.get('instype', '') item['spot_date'] = each_item.get('insdate', '') item['spot_result'] = each_item.get('insres_cn', '') self.qyxx_spot_check.append(item)
def branch_execute(self, page_soup): url = 'http://gx.gsxt.gov.cn/gjjbj/gjjQueryCreditAction!fzjgFrame.dhtml' self.params["regno"] = self.regno self.params["urltag"] = '4' self.params["ent_id"] = self.params["entId"] headers = { 'Host': 'gx.gsxt.gov.cn', 'User-Agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/54.0.2840.99 Safari/537.36') } resp = url_requests.get(url, headers=headers, params=self.params, proxies=proxies) page_soup = BeautifulSoup(resp.content, 'lxml') soup = page_soup.find('div', {'class': 'qyqx-detail'}) if soup: td = soup.find_all('table') for each in td: info = {} td = each.find_all( 'td') # 'company_num', 'company_name', 'authority' info['company_name'] = td[0].text.strip() info['company_num'] = td[1].text.split( ':')[1].strip() if len(td) > 1 else '' info['authority'] = td[2].text.split( ':')[1].strip() if len(td) > 2 else '' self.qyxx_branch.append(info)
def get_cookies(): """获取首页的cookies :return: cookies """ url = 'http://www.jsgsj.gov.cn:58888/province/' headers = { 'Host': 'www.jsgsj.gov.cn:58888', 'X-Forwarded-For': '8.8.8.8', 'Referer': 'http://gsxt.saic.gov.cn/', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/44.0.2403.155 Safari/537.36' } response = url_requests.get(url=url, headers=headers, proxies=proxies, timeout=20) cookies = dict(response.cookies) return cookies
def get_home(self): ''' 可能会被更改值的参数: self.cookies ''' headers = { 'Host': 'gx.gsxt.gov.cn', # 'Connection': 'keep-alive', # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'User-Agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/54.0.2840.99 Safari/537.36') } resp = url_requests.get(url=self.url_home, headers=headers, proxies=proxies) print "00:", resp.cookies if len(str(resp.cookies).split(" ")) > 5: print 'get' self.cookies = str(resp.cookies).split(" ")[5] + ";" + str( resp.cookies).split(" ")[1] elif 5 > len(str(resp.cookies).split(" ")) > 2: print 'get2' self.cookies = self.cookies.split(";")[0] + ";" + str( resp.cookies).split(" ")[1] resp.close()
def mortgage_basic_execute(self): url = ('http://%s/aiccips//PleInfo/PleInfoList' % self.host) response = url_requests.get(url=url, params=self.data, headers=self.headers, proxies=proxies) items = json.loads(response.text)['list']['list'] # type is list if items: for each_item in items: item = {} item['mortgage_reg_num'] = each_item.get('pleNo', '') item['date_reg'] = each_item.get('regiDate', '') if item['date_reg']: # 将时间戳转换成日期 date = time.localtime(item['date_reg'] / 1000) item['date_reg'] = time.strftime("%Y-%m-%d", date) item['authority'] = each_item.get('regOrgStr', '') item['amount'] = str(each_item.get('priClaSecAm', '')) + '万元' if '1' in each_item.get('type', ''): item['status'] = '有效' else: item['status'] = '无效' item['gongshiriqi'] = each_item.get('pefPerForm', '') if item['gongshiriqi']: # 将时间戳转换成日期 date = time.localtime(item['gongshiriqi'] / 1000) item['gongshiriqi'] = time.strftime("%Y-%m-%d", date) item['detail'] = each_item # 后面通过这个信息里的字段获取详细信息 self.qyxx_mortgage_basic.append(item)
def mortgage_basic_execute(self, page_soup): url = 'http://gx.gsxt.gov.cn/gjjbjTab/gjjTabQueryCreditAction!dcdyFrame.dhtml' # self.params["regno"] = self.regno self.params["urltag"] = '12' # self.params["ent_id"] = self.params["entId"] headers = { 'Host': 'gx.gsxt.gov.cn', 'User-Agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/54.0.2840.99 Safari/537.36') } resp = url_requests.get(url, headers=headers, params=self.params, proxies=proxies) page_soup = BeautifulSoup(resp.content, 'lxml') soup = page_soup.find('div', {'class': 'qyqx-detail'}) # print soup if soup: key_list = [ 'xuhao', 'mortgage_reg_num', 'date_reg', 'authority', 'amount', 'status', 'gsrq', 'xiangqing' ] info = CreditInfo.parse(soup, 'table', {'class': 'table-result'}, key_list=key_list) self.qyxx_mortgage_basic.extend(info)
def adm_punishment_execute(self, **kwargs): hyperlinks = kwargs.get('hyperlink') keyword = ['行政处罚'] url = get_url(hyperlinks, self.query, keyword) params = kwargs.get('params') if url: response = url_requests.get(url=url, params=params, headers=self.headers, proxies=proxies) try: jsn = json.loads(response.content) items = jsn[0]['list'] except: print response.content raise for each_item in items: item = {} item['pun_number'] = each_item.get('pendecno', '') item['reason'] = each_item.get('illegacttype', '') item['fines'] = each_item.get('pencontent', '') item['authority'] = each_item.get('penauth_cn', '') item['pun_date'] = each_item.get('pendecissdate', '') item['gongshiriqi'] = each_item.get('publicdate', '') self.qyxx_adm_punishment.append(item)
def get_checkCode(): """用cookies获取验证码并识别成字符串 :return: string """ captcha_url = 'http://www.jsgsj.gov.cn:58888/province/rand_img.jsp' headers = { 'X-Forwarded-For': '8.8.8.8', 'Host': 'www.jsgsj.gov.cn:58888', 'Referer': 'http://www.jsgsj.gov.cn:58888/province/', 'User-Agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/44.0.2403.155 Safari/537.36') } html = url_requests.get(url=captcha_url, headers=headers, cookies=cookies, proxies=proxies, timeout=20).content file = BytesIO(html) im = Image.open(file) checkCode = image_to_string(im) return checkCode
def pledge_execute(self, page_soup): url = 'http://gx.gsxt.gov.cn/gdczdj/gdczdjAction!gdczdjFrame.dhtml' # self.params["regno"] = self.regno self.params["urltag"] = '13' # self.params["ent_id"] = self.params["entId"] headers = { 'Host': 'gx.gsxt.gov.cn', 'User-Agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/54.0.2840.99 Safari/537.36') } resp = url_requests.get(url, headers=headers, params=self.params, proxies=proxies) page_soup = BeautifulSoup(resp.content, 'lxml') key_list = [ 'xuhao', 'reg_code', 'pleder', 'id_card', 'plede_amount', 'brower', 'brower_id_card', 'reg_date', 'status', 'gongshiriqi', 'changes' ] soup = page_soup.find('div', {'class': 'qyqx-detail'}) # print soup if soup: info = CreditInfo.parse(soup, 'table', {'class': 'table-result'}, key_list=key_list) self.qyxx_pledge.extend(info)
def pledge_execute(self): url = ('http://%s/aiccips//StoPleInfo/StoPleInfoList' % self.host) response = url_requests.get(url=url, params=self.data, headers=self.headers, proxies=proxies) items = json.loads(response.text)['list']['list'] # type is list if items: for each_item in items: item = {} item['reg_code'] = each_item.get('stoRegNo', '').strip() item['pleder'] = each_item.get('inv', '') item['id_card'] = each_item.get('invID', '') item['plede_amount'] = str(each_item.get('impAm', '')) + '万元' item['brower'] = each_item.get('impOrg', '') item['brower_id_card'] = each_item.get('impOrgID', '') item['reg_date'] = each_item.get('registDate', '') if item['reg_date']: # 将时间戳转换成日期 date = time.localtime(item['reg_date'] / 1000) item['reg_date'] = time.strftime("%Y-%m-%d", date) if '1' in each_item.get('type', ''): item['status'] = '有效' else: item['status'] = '无效' item['changes'] = '详情' self.qyxx_pledge.append(item)
def adm_punishment_execute(self, page_soup): url = 'http://gx.gsxt.gov.cn//gdgq/gdgqAction!xj_qyxzcfFrame.dhtml' # self.params["regno"] = self.regno self.params["urltag"] = '14' # self.params["ent_id"] = self.params["entId"] headers = { 'Host': 'gx.gsxt.gov.cn', 'User-Agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/54.0.2840.99 Safari/537.36') } resp = url_requests.get(url, headers=headers, params=self.params, proxies=proxies) page_soup = BeautifulSoup(resp.content, 'lxml') key_list = [ 'xuhao', 'pun_number', 'reason', 'fines', 'authority', 'pun_date', 'gongshiriqi', 'detail' ] soup = page_soup.find('div', {'class': 'qyqx-detail'}) if soup: info = CreditInfo.parse(soup, 'table', {'class': 'table-result'}, key_list=key_list) self.qyxx_adm_punishment.extend(info)
def stock_freeze_execute(self): url = ('http://%s/aiccips//judiciaryAssist/judiciaryAssistList' % self.host) response = url_requests.get(url=url, params=self.data, headers=self.headers, proxies=proxies) items = json.loads(response.text)['list']['list'] # type is list if items: for each_item in items: item = {} item['person'] = each_item.get('inv', '') item['stock'] = str(each_item.get('froAm', '')) + '万元人民币' item['court'] = each_item.get('froAuth', '') item['notice_number'] = each_item.get('froDocNo', '') item['status'] = each_item.get('frozState', '').strip() if item['status'] in ['1', '2', '3', '4']: item['status'] = '冻结' elif item['status'] == '5': item['status'] = '解除冻结' else: item['status'] = '失效' self.qyxx_stock_freeze.append(item)
def abnormal_execute(self, page_soup): url = 'http://gx.gsxt.gov.cn/gsgs/gsxzcfAction!list_jyycxx.dhtml' self.params["regno"] = self.regno self.params["urlflag"] = '8' headers = { 'Host': 'gx.gsxt.gov.cn', 'User-Agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/54.0.2840.99 Safari/537.36') } resp = url_requests.get(url, headers=headers, params=self.params, proxies=proxies) page_soup = BeautifulSoup(resp.content, 'lxml') # print page_soup key_list = [ 'xuhao', 'reason', 'date_occurred', 'authority_occurred', 'reason_out', 'date_out', 'authority' ] info = CreditInfo.parse(page_soup, 'table', {'class': 'table-result'}, key_list=key_list) self.qyxx_adm_punishment.extend(info)