Exemplo n.º 1
0
    def __init__(self, info=None, parser=None, *args, **kwargs):

        useproxy = UseProxy()
        is_use_proxy = useproxy.get_province_is_use_proxy(province='BEIJING')
        if not is_use_proxy:
            self.proxies = []
        else:
            proxy = Proxy()
            self.proxies = {
                'http':
                'http://' +
                random.choice(proxy.get_proxy(num=5, province='beijing')),
                'https':
                'https://' +
                random.choice(proxy.get_proxy(num=5, province='beijing'))
            }
        print 'self.proxies:', self.proxies
        # self.proxies = []

        self.info = info
        self.parser = MyParser(info=self.info)
        self.write_file_mutex = threading.Lock()
        self.reqst = requests.Session()
        self.reqst.headers.update({
            'Accept':
            'text/html, application/xhtml+xml, */*',
            'Accept-Encoding':
            'gzip, deflate',
            'Accept-Language':
            'en-US, en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:39.0) Gecko/20100101 Firefox/39.0'
        })
Exemplo n.º 2
0
    def __init__(self, info=None, parser=None, *args, **kwargs):
        useproxy = UseProxy()
        is_use_proxy = useproxy.get_province_is_use_proxy(province='shanghai')
        if not is_use_proxy:
            self.proxies = []
        else:
            proxy = Proxy()
            self.proxies = {
                'http':
                'http://' +
                random.choice(proxy.get_proxy(num=5, province='shanghai')),
                'https':
                'https://' +
                random.choice(proxy.get_proxy(num=5, province='shanghai'))
            }
        print 'self.proxies:', self.proxies

        # self.proxies = []

        self.info = info
        self.parser = MyParser(info=self.info)
        self.write_file_mutex = threading.Lock()
        self.reqst = requests.Session()
        self.reqst.headers.update({
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Encoding':
            'gzip, deflate, br',
            'Accept-Language':
            'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:46.0) Gecko/20100101 Firefox/46.0'
        })
Exemplo n.º 3
0
def get_proxy(province=''):
    useproxy = UseProxy()
    is_use_proxy = useproxy.get_province_is_use_proxy(province)
    if not is_use_proxy:
        proxies = {}
    else:
        proxy = Proxy()
        proxies = {
            'http':
            'http://' +
            random.choice(proxy.get_proxy(num=5, province=province))
        }
        if province.lower() == 'shanghai':
            proxies = {
                'https':
                'https://' +
                random.choice(proxy.get_proxy(num=5, province=province))
            }
    return proxies
Exemplo n.º 4
0
 def __init__(self, info=None, *args, **kwargs):
     # 调用代理,及配置是否使用代理的接口。完成使用代理或者不使用代理。
     useproxy = UseProxy()
     is_use_proxy = useproxy.get_province_is_use_proxy(province='jiangsu')
     if not is_use_proxy:
         self.proxies = []
     else:
         proxy = Proxy()
         self.proxies = {
             'http': 'http://' + random.choice(proxy.get_proxy(num=5, province='jiangsu')),
             'https': 'https://' + random.choice(proxy.get_proxy(num=5, province='jiangsu'))
         }
     print 'self.proxies:', self.proxies
     # self.proxies = []
     self.reqst = requests.Session()
     self.reqst.headers.update({
         'Accept': 'text/html, application/xhtml+xml, */*',
         'Accept-Encoding': 'gzip, deflate',
         'Accept-Language':
         'en-US, en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
         'User-Agent':
         'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:39.0) Gecko/20100101 Firefox/39.0'
     })
Exemplo n.º 5
0
    def __init__(self, json_restore_path):
        self.id = None
        self.reqst = requests.Session()
        self.json_restore_path = json_restore_path
        self.ckcode_image_path = settings.json_restore_path + '/yunnan/ckcode.jpg'
        if not os.path.exists(os.path.dirname(self.ckcode_image_path)):
            os.makedirs(os.path.dirname(self.ckcode_image_path))
        self.result_json_dict = {}
        self.code_cracker = CaptchaRecognition('yunnan')
        self.reqst.headers.update({
            'Accept':
            'text/html, application/xhtml+xml, */*',
            'Accept-Encoding':
            'gzip, deflate',
            'Accept-Language':
            'en-US, en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:39.0) Gecko/20100101 Firefox/39.0'
        })

        useproxy = UseProxy()
        is_use_proxy = useproxy.get_province_is_use_proxy(province='guangxi')
        if not is_use_proxy:
            self.proxies = []
        else:
            proxy = Proxy()
            self.proxies = {
                'http':
                'http://' +
                random.choice(proxy.get_proxy(num=5, province='guangxi')),
                'https':
                'https://' +
                random.choice(proxy.get_proxy(num=5, province='guangxi'))
            }
        print 'self.proxies:', self.proxies
        # self.proxies = []

        self.mydict = {
            'eareName':
            'http://www.ahcredit.gov.cn',
            'search':
            'http://gsxt.ynaic.gov.cn/notice/',
            'searchList':
            'http://gsxt.ynaic.gov.cn/notice/search/ent_info_list',
            'validateCode':
            'http://gsxt.ynaic.gov.cn/notice/captcha?preset=&ra=0.06570781518790503'
        }

        self.one_dict = {
            u'基本信息': 'ind_comm_pub_reg_basic',
            u'股东信息': 'ind_comm_pub_reg_shareholder',
            u'发起人信息': 'ind_comm_pub_reg_shareholder',
            u'股东(发起人)信息': 'ind_comm_pub_reg_shareholder',
            u'合伙人信息': 'ind_comm_pub_reg_shareholder',
            u'变更信息': 'ind_comm_pub_reg_modify',
            u'主要人员信息': 'ind_comm_pub_arch_key_persons',
            u'分支机构信息': 'ind_comm_pub_arch_branch',
            u'清算信息': 'ind_comm_pub_arch_liquidation',
            u'动产抵押登记信息': 'ind_comm_pub_movable_property_reg',
            u'股权出置登记信息': 'ind_comm_pub_equity_ownership_reg',
            u'股权出质登记信息': 'ind_comm_pub_equity_ownership_reg',
            u'行政处罚信息': 'ind_comm_pub_administration_sanction',
            u'经营异常信息': 'ind_comm_pub_business_exception',
            u'严重违法信息': 'ind_comm_pub_serious_violate_law',
            u'抽查检查信息': 'ind_comm_pub_spot_check'
        }

        self.two_dict = {
            u'企业年报': 'ent_pub_ent_annual_report',
            u'企业投资人出资比例': 'ent_pub_shareholder_capital_contribution',
            u'股东(发起人)及出资信息': 'ent_pub_shareholder_capital_contribution',
            u'股东及出资信息(币种与注册资本一致)': 'ent_pub_shareholder_capital_contribution',
            u'股权变更信息': 'ent_pub_equity_change',
            u'行政许可信息': 'ent_pub_administration_license',
            u'知识产权出资登记': 'ent_pub_knowledge_property',
            u'知识产权出质登记信息': 'ent_pub_knowledge_property',
            u'行政处罚信息': 'ent_pub_administration_sanction',
            u'变更信息': 'ent_pub_shareholder_modify'
        }
        self.three_dict = {
            u'行政许可信息': 'other_dept_pub_administration_license',
            u'行政处罚信息': 'other_dept_pub_administration_sanction'
        }
        self.four_dict = {
            u'股权冻结信息': 'judical_assist_pub_equity_freeze',
            u'司法股权冻结信息': 'judical_assist_pub_equity_freeze',
            u'股东变更信息': 'judical_assist_pub_shareholder_modify',
            u'司法股东变更登记信息': 'judical_assist_pub_shareholder_modify'
        }
        self.result_json_dict = {}
Exemplo n.º 6
0
 def test_api(self):
     proxy = Proxy()
     print proxy.get_proxy()
     print proxy.get_proxy(is_valid=False)