Exemplo n.º 1
0
def get_geeTest(geeTest_params):
    """返回极验验证的三个验证参数"""

    url = 'http://api.geetest.com/ajax.php'
    headers = {
        'Host': 'api.geetest.com',
        'Connection': 'keep-alive',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'X-Requested-With': 'XMLHttpRequest',
        'User-Agent': url_requests.random_userAgent(),
        'Referer': 'http://www.gsxt.gov.cn/index.html'
    }
    if not geeTest_params:
        return None
    response = url_requests.get(url=url,
                                params=geeTest_params,
                                headers=headers,
                                proxies=proxies)

    json_string = response.content.split('(')[1][:-1]
    data = json.loads(json_string)
    if data['message'] != 'success':
        return None
    geeTest = {}
    geeTest['geetest_validate'] = data.get('validate')
    geeTest['geetest_seccode'] = data.get('validate') + '|jordan'

    return geeTest
Exemplo n.º 2
0
def get_headers():
    '''随机获取userAgent'''

    headers = {
        'Host': 'www.gsxt.gov.cn',
        'Referer': 'http://www.gsxt.gov.cn/corp-query-search-1.html',
        'User-Agent': url_requests.random_userAgent()
    }

    return headers
Exemplo n.º 3
0
def get_result(name, geeTest):
    """传入企业名称及极验验证通过的参数返回企业列表"""

    url = 'http://www.gsxt.gov.cn/corp-query-search-1.html'
    headers = {
        'Host': 'www.gsxt.gov.cn',
        'Origin': 'http://www.gsxt.gov.cn',
        'Connection': 'keep-alive',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'X-Requested-With': 'XMLHttpRequest',
        'User-Agent': url_requests.random_userAgent(),
        'Referer': 'http://www.gsxt.gov.cn/index.html'
    }
    geeTest.update({
        'searchword': name,
        'tab': 'ent_tab',
        'token': '49092658'  # 伪装验证码
    })

    response = url_requests.post(url=url,
                                 data=geeTest,
                                 headers=headers,
                                 proxies=proxies)

    soup = BeautifulSoup(response.content, 'lxml')

    result_count = soup.find('span', class_='search_result_span1')
    if int(result_count.text) == 0:
        print '未查询到相关企业'
        return []
    search_list = soup.find_all('a', class_='search_list_item db')
    result_items = []
    for each_search in search_list:
        search_name = each_search.find('h1', class_='f20')
        search_name = search_name.text.replace('\n', '')\
                                      .replace('\r', '')\
                                      .replace('\t', '')\
                                      .strip()
        detail_link = 'http://www.gsxt.gov.cn' + each_search['href']
        item = {}
        item['company'] = search_name
        item['detail'] = detail_link

        result_items.append(item)

    return result_items
Exemplo n.º 4
0
def get_path(gt):
    """获取验证码所需要用到的path参数"""

    url = 'http://api.geetest.com/gettype.php'
    headers = {
        'Host': 'api.geetest.com',
        'User-Agent': url_requests.random_userAgent(),
        'Referer': 'http://www.gsxt.gov.cn/index.html'
    }
    params = {'gt': gt, 'callback': 'geetest_' + str(int(time.time() * 1000))}
    response = url_requests.get(url=url,
                                params=params,
                                headers=headers,
                                proxies=proxies)
    res = json.loads(response.content.split('(')[1][:-1])
    data = res.get('data', {})
    path = data.get('path', '/static/js/geetest.5.7.0.js')

    return path
Exemplo n.º 5
0
def get_four(gt, challenge, path):
    """更新gt和challenge,同时获取被剪掉的滑快和完整图的图片地址,组合成字典返回"""

    url = 'http://api.geetest.com/get.php'
    headers = {
        'Host': 'api.geetest.com',
        'Connection': 'keep-alive',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'X-Requested-With': 'XMLHttpRequest',
        'User-Agent': url_requests.random_userAgent(),
        'Referer': 'http://www.gsxt.gov.cn/index.html'
    }
    params = {
        'gt': gt,
        'challenge': challenge,
        'product': 'popup',
        'offline': 'false',
        'protocol': '',
        'type': 'slide',
        'path': path,
        'callback': 'geetest_%s' % str(int(time.time() * 1000))
    }

    response = url_requests.get(url=url,
                                params=params,
                                headers=headers,
                                proxies=proxies)

    json_content = response.content.split('(')[1][:-1]
    data = json.loads(json_content)

    query_string = {}
    query_string['gt'] = data.get('gt')
    query_string['challenge'] = data.get('challenge')
    query_string['gb'] = data.get('bg')
    query_string['fullbg'] = data.get('fullbg')

    return query_string
Exemplo n.º 6
0
def get_gt_challenge():
    """获取gt和challenge参数,滑快验证码需要用到的。"""

    url = 'http://www.gsxt.gov.cn/SearchItemCaptcha'
    headers = {
        'Host': 'www.gsxt.gov.cn',
        'Connection': 'keep-alive',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'X-Requested-With': 'XMLHttpRequest',
        'User-Agent': url_requests.random_userAgent(),
        'Referer': 'http://www.gsxt.gov.cn/index.html'
    }
    params = {'v': int(time.time() * 1000)}

    response = url_requests.get(url=url,
                                params=params,
                                headers=headers,
                                proxies=proxies)

    data = json.loads(response.content)
    gt = data.get('gt')
    challenge = data.get('challenge')

    return gt, challenge