def __init__(self, info=None, parser=None, *args, **kwargs): useproxy = UseProxy() is_use_proxy = useproxy.get_province_is_use_proxy(province='BEIJING') if not is_use_proxy: self.proxies = [] else: proxy = Proxy() self.proxies = { 'http': 'http://' + random.choice(proxy.get_proxy(num=5, province='beijing')), 'https': 'https://' + random.choice(proxy.get_proxy(num=5, province='beijing')) } print 'self.proxies:', self.proxies # self.proxies = [] self.info = info self.parser = MyParser(info=self.info) self.write_file_mutex = threading.Lock() self.reqst = requests.Session() self.reqst.headers.update({ 'Accept': 'text/html, application/xhtml+xml, */*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-US, en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:39.0) Gecko/20100101 Firefox/39.0' })
def __init__(self, info=None, parser=None, *args, **kwargs): useproxy = UseProxy() is_use_proxy = useproxy.get_province_is_use_proxy(province='shanghai') if not is_use_proxy: self.proxies = [] else: proxy = Proxy() self.proxies = { 'http': 'http://' + random.choice(proxy.get_proxy(num=5, province='shanghai')), 'https': 'https://' + random.choice(proxy.get_proxy(num=5, province='shanghai')) } print 'self.proxies:', self.proxies # self.proxies = [] self.info = info self.parser = MyParser(info=self.info) self.write_file_mutex = threading.Lock() self.reqst = requests.Session() self.reqst.headers.update({ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:46.0) Gecko/20100101 Firefox/46.0' })
def get_proxy(province=''): useproxy = UseProxy() is_use_proxy = useproxy.get_province_is_use_proxy(province) if not is_use_proxy: proxies = {} else: proxy = Proxy() proxies = { 'http': 'http://' + random.choice(proxy.get_proxy(num=5, province=province)) } if province.lower() == 'shanghai': proxies = { 'https': 'https://' + random.choice(proxy.get_proxy(num=5, province=province)) } return proxies
def __init__(self, info=None, *args, **kwargs): # 调用代理,及配置是否使用代理的接口。完成使用代理或者不使用代理。 useproxy = UseProxy() is_use_proxy = useproxy.get_province_is_use_proxy(province='jiangsu') if not is_use_proxy: self.proxies = [] else: proxy = Proxy() self.proxies = { 'http': 'http://' + random.choice(proxy.get_proxy(num=5, province='jiangsu')), 'https': 'https://' + random.choice(proxy.get_proxy(num=5, province='jiangsu')) } print 'self.proxies:', self.proxies # self.proxies = [] self.reqst = requests.Session() self.reqst.headers.update({ 'Accept': 'text/html, application/xhtml+xml, */*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-US, en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:39.0) Gecko/20100101 Firefox/39.0' })
def __init__(self, json_restore_path): self.id = None self.reqst = requests.Session() self.json_restore_path = json_restore_path self.ckcode_image_path = settings.json_restore_path + '/yunnan/ckcode.jpg' if not os.path.exists(os.path.dirname(self.ckcode_image_path)): os.makedirs(os.path.dirname(self.ckcode_image_path)) self.result_json_dict = {} self.code_cracker = CaptchaRecognition('yunnan') self.reqst.headers.update({ 'Accept': 'text/html, application/xhtml+xml, */*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-US, en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:39.0) Gecko/20100101 Firefox/39.0' }) useproxy = UseProxy() is_use_proxy = useproxy.get_province_is_use_proxy(province='guangxi') if not is_use_proxy: self.proxies = [] else: proxy = Proxy() self.proxies = { 'http': 'http://' + random.choice(proxy.get_proxy(num=5, province='guangxi')), 'https': 'https://' + random.choice(proxy.get_proxy(num=5, province='guangxi')) } print 'self.proxies:', self.proxies # self.proxies = [] self.mydict = { 'eareName': 'http://www.ahcredit.gov.cn', 'search': 'http://gsxt.ynaic.gov.cn/notice/', 'searchList': 'http://gsxt.ynaic.gov.cn/notice/search/ent_info_list', 'validateCode': 'http://gsxt.ynaic.gov.cn/notice/captcha?preset=&ra=0.06570781518790503' } self.one_dict = { u'基本信息': 'ind_comm_pub_reg_basic', u'股东信息': 'ind_comm_pub_reg_shareholder', u'发起人信息': 'ind_comm_pub_reg_shareholder', u'股东(发起人)信息': 'ind_comm_pub_reg_shareholder', u'合伙人信息': 'ind_comm_pub_reg_shareholder', u'变更信息': 'ind_comm_pub_reg_modify', u'主要人员信息': 'ind_comm_pub_arch_key_persons', u'分支机构信息': 'ind_comm_pub_arch_branch', u'清算信息': 'ind_comm_pub_arch_liquidation', u'动产抵押登记信息': 'ind_comm_pub_movable_property_reg', u'股权出置登记信息': 'ind_comm_pub_equity_ownership_reg', u'股权出质登记信息': 'ind_comm_pub_equity_ownership_reg', u'行政处罚信息': 'ind_comm_pub_administration_sanction', u'经营异常信息': 'ind_comm_pub_business_exception', u'严重违法信息': 'ind_comm_pub_serious_violate_law', u'抽查检查信息': 'ind_comm_pub_spot_check' } self.two_dict = { u'企业年报': 'ent_pub_ent_annual_report', u'企业投资人出资比例': 'ent_pub_shareholder_capital_contribution', u'股东(发起人)及出资信息': 'ent_pub_shareholder_capital_contribution', u'股东及出资信息(币种与注册资本一致)': 'ent_pub_shareholder_capital_contribution', u'股权变更信息': 'ent_pub_equity_change', u'行政许可信息': 'ent_pub_administration_license', u'知识产权出资登记': 'ent_pub_knowledge_property', u'知识产权出质登记信息': 'ent_pub_knowledge_property', u'行政处罚信息': 'ent_pub_administration_sanction', u'变更信息': 'ent_pub_shareholder_modify' } self.three_dict = { u'行政许可信息': 'other_dept_pub_administration_license', u'行政处罚信息': 'other_dept_pub_administration_sanction' } self.four_dict = { u'股权冻结信息': 'judical_assist_pub_equity_freeze', u'司法股权冻结信息': 'judical_assist_pub_equity_freeze', u'股东变更信息': 'judical_assist_pub_shareholder_modify', u'司法股东变更登记信息': 'judical_assist_pub_shareholder_modify' } self.result_json_dict = {}
def test_api(self): proxy = Proxy() print proxy.get_proxy() print proxy.get_proxy(is_valid=False)