Exemplo n.º 1
0
def quit_empl():
    if request.method == 'GET':
        return Status(201, 'failure', u'quit_empl API请求方法错误', {}).json()

    try:
        req_json = request.get_json()
        _type = req_json.get('type')
        if _type in ['one', u'one']:
            return EmployeeService().quit_empl(req_json.get('data_id'))

        _res = list()
        data_ids = req_json.get('data_id')
        for order_id in data_ids:
            if not order_id:
                continue
            res = EmployeeService().quit_empl(order_id)
            res = json.loads(res)
            if res.get('status_id') != 100:
                _res.append(res.get('data').get('data_id'))
        if len(_res) > 0:
            return Status(201, 'failure', u'订单:%s删除失败' % ','.join(_res),
                          {}).json()

        return Status(100, 'failure', u'订单:%s删除成功' % ','.join(data_ids),
                      {}).json()
    except Exception as e:
        LOG.error("employee>quit_empl is error: %s" % e)
        return Status(101, 'failure', u'Server发生错误,获取失败', {}).json()
    def adds(self, datas):
        if not datas:
            LOG.error('DB: data is null.')
            return

        failure_list = list()
        success_list = list()
        for data in datas:
            if not data:
                continue

            if isinstance(data, str):
                data = dict(data)
            credit_code = data.get('credit_code')
            name = data.get('name')
            if not credit_code:
                failure_list.append(name)
                continue
            model = self.enterprise_bo.get_by_code(credit_code)
            if model:
                failure_list.append(name)
                continue

            new_model = self.enterprise_bo.new_mode()
            new_model.name = name
            new_model.phone = data.get('phone')
            new_model.email = data.get('email')
            new_model.tyc_url = data.get('tyc_url')
            new_model.company_url = data.get('company_url')
            new_model.address = data.get('address')
            new_model.register_funds = data.get('register_funds')
            new_model.paidin_funds = data.get('paidin_funds')
            new_model.establish_date = data.get('establish_date')
            new_model.status = data.get('status')
            new_model.credit_code = credit_code
            new_model.company_type = data.get('company_type')
            new_model.industry = data.get('industry')
            new_model.business_term = data.get('business_term')
            new_model.resume = data.get('resume')
            new_model.business_scope = data.get('business_scope')
            new_model.key = data.get('key')
            new_model.create_time = get_now()
            new_model.city = data.get('city')
            new_model.sub_city = data.get('sub_city')
            try:
                self.enterprise_bo.add_model(new_model)
                success_list.append(name)
            except Exception as e:
                LOG.error('DB add error %s: %s' % (e, str(data)))
                failure_list.append(name)
        else:
            if success_list:
                LOG.info('success list:【%s】' % len(success_list))
            if failure_list:
                LOG.info('failure list:【%s】' % len(failure_list))
            return success_list, failure_list
Exemplo n.º 3
0
def api_list_all():
    if request.method == 'GET':
        return Status(201, 'failure', u'api_list API请求方法错误', {}).json()

    try:
        json = request.get_json()
        res = EmployeeService().get_all(json)
    except Exception as e:
        LOG.error("employee>api_list is error: %s" % e)
        res = Status(101, 'failure', u'Server发生错误,获取失败', {}).json()
    return res
Exemplo n.º 4
0
def add_or_edit_api():
    if request.method == 'GET':
        return Status(201, 'failure', u'add_or_edit_api API请求方法错误', {}).json()

    try:
        json = request.get_json()
        res = EmployeeService().add_or_edit_empl(json)
    except Exception as e:
        LOG.error("employee>add_api is error: %s" % e)
        res = Status(101, 'failure', u'Server发生错误,新增失败', {}).json()
    return res
Exemplo n.º 5
0
def api_post(url, headers={}, data={}, retry=1, resptype='json', **kwargs):
    """
    http post
    :param url: url
    :param headers: headers
    :param data: data
    :return: response
    """
    if not url:
        return False, 'api_post url is not allow null'

    if isinstance(data, dict):
        data = json.dumps(data)
    if not isinstance(headers, dict):
        headers = json.dumps(headers)

    try:
        if not IS_PROXY_RUN:
            response = requests.post(url=url,
                                     headers=headers,
                                     data=data,
                                     timeout=5)
        else:
            random_ip = get_random_proxy()
            proxies = {'http': random_ip} if random_ip else {}
            response = requests.post(url=url,
                                     headers=headers,
                                     data=data,
                                     timeout=5,
                                     proxies=proxies)
    except Exception as e:
        if retry <= 3:
            random_sleep(1, 1.5)
            api_post(url=url,
                     headers=headers,
                     data=data,
                     retry=retry + 1,
                     resptype=resptype)
        else:
            LOG.error(u'@@@@@ %s api_post error: %s' % (url, e))
            return False, []
    else:
        respcode = response.status_code
        if respcode != 200:
            return False, 'api_post response status code is: %s' % respcode
        elif respcode == 200 and resptype == 'raw':
            return True, response.raw
        elif respcode == 200 and resptype == 'content':
            return True, response.content
        elif respcode == 200 and resptype == 'json':
            return True, response.json()
        else:
            return True, response.text
Exemplo n.º 6
0
    def adds(self, datas):
        if not datas:
            LOG.error('DB: data is null.')

        failure_list = list()
        success_list = list()
        for data in datas:
            if not data:
                continue

            credit_code = data.get('credit_code')
            name = data.get('name')
            model = self.enterprise_bo.get_by_code(credit_code)
            if model:
                failure_list.append(name)
                continue

            new_model = self.enterprise_bo.new_mode()
            new_model.name = name
            new_model.phone = data.get('phone')
            new_model.email = data.get('email')
            new_model.tyt_url = data.get('tyt_url')
            new_model.company_url = data.get('company_url')
            new_model.address = data.get('address')
            new_model.register_funds = data.get('register_funds')
            new_model.paidin_funds = data.get('paidin_funds')
            new_model.establish_date = data.get('establish_date')
            new_model.status = data.get('status')
            new_model.credit_code = credit_code
            new_model.registration_number = data.get('registration_number')
            new_model.identification_number = data.get('identification_number')
            new_model.organization_code = data.get('organization_code')
            new_model.company_type = data.get('company_type')
            new_model.industry = data.get('industry')
            new_model.business_term = data.get('business_term')
            new_model.taxpayer_qualification = data.get(
                'taxpayer_qualification')
            new_model.personnel_size = data.get('personnel_size')
            new_model.insured_num = data.get('insured_num')
            new_model.resume = data.get('resume')
            new_model.registered_address = data.get('registered_address')
            new_model.business_scope = data.get('business_scope')
            self.enterprise_bo.add_model(new_model)
            success_list.append(name)
        else:
            print('success list【%s】:%s' %
                  (len(success_list), ','.join(success_list)))
            print('failure list【%s】:%s' %
                  (len(failure_list), ','.join(failure_list)))
Exemplo n.º 7
0
def upload_image():
    image = request.files.get('avatar')
    g.menuf = 'setter'
    g.menusub = 'user'
    try:
        form = request.form
        res = SetterService().upload_info(image, form)
    except Exception as e:
        LOG.error("setter>upload_info is error: %s" % e)
        res = Status(101,
                     'failure',
                     u'Server发生错误,获取失败',
                     {}).json()
    LOG.info('%s update information' % get_user_id())
    return res
Exemplo n.º 8
0
    def upload_info(self, image_file, form):
        for k, v in self.request_list.iteritems():
            if not k or not v:
                continue

            if not form.get(k):
                return Status(202, 'failure', u'请完善%s信息在进行提交' % v, {}).json()
        db_image = ''
        if image_file:
            image_name = image_file.filename
            if not self.__allow_format_img(image_name):
                return Status(202, 'failure', u'图片格式支持:jpg、png、bmp、jpeg',
                              {}).json()

            _base_dir = get_base_dir()
            now_date = get_now(format="%Y-%m-%d")

            def __get_filename_by_md5(file_name):
                suffix = (os.path.splitext(file_name)[1]).lower()
                _v = get_now() + file_name
                return (md5(_v) + suffix)

            store_file_name = __get_filename_by_md5(image_name)
            db_image = os.path.join((UPLOAD_BASE_DIR + now_date),
                                    store_file_name)
            store_dir = _base_dir + UPLOAD_BASE_DIR + now_date
            if not os.path.exists(store_dir):
                mk_dirs(store_dir)
            image_file.save(os.path.join(store_dir, store_file_name))

        try:
            user_mode = self.sysuser_bo.get_user_by_params(get_user_id())
            if db_image:
                user_mode.image = db_image
            ret_image = db_image if db_image else user_mode.image
            user_mode.fullname = form.get('fullname')
            user_mode.phone = form.get('phone')
            if form.get('email'):
                user_mode.email = form.get('email')
            self.sysuser_bo.merge_model(user_mode)
        except Exception as e:
            LOG.error("upload info is error: %s" % e)
            return Status(300, 'failure', u'upload info更新db失败', {}).json()

        return Status(100, 'success', u'信息完善成功!', {'image': ret_image}).json()
Exemplo n.º 9
0
    def _process_by_key(self):
        pool = multiprocessing.Pool(processes=(MAX_CPU - 1 if MAX_CPU > 2 else 1))
        LOG.info('Main process: %s, run cpu count: %s' % (os.getpid(), (MAX_CPU - 1 if MAX_CPU > 2 else 1)))
        process = list()

        for key in self.keys:
            if not key:
                continue

            min_page, max_page, max_pagination, max_range = self.tyc_client.get_pagination(key)
            LOG.info('[%s][%s]spider page: %s ~ %s ||| max_pagination: %s ||| max range: %s'
                     % (RUN_MODE, key, min_page, max_page, max_pagination, max_range))
            if not max_range:
                LOG.error("It's not have max range")
                sys.exit()

            for i in range(0, max_range, 1):
                max_page = min_page + PAGINATION
                if max_page > max_pagination:
                    max_page = max_pagination
                process.append(
                    pool.apply_async(self.tyc_client.work_by_key, args=(key, min_page, max_page,
                                                                        self.q, self.citys_list, self.sub_citys_mapping))
                )
                min_page = max_page + 1

        pool.close()
        pool.join()

        while 1:
            try:
                if self.q.empty():
                    break
                self.ret_res_list.append(self.q.get_nowait())
            except:
                pass

        self.to_store(self.keys, MIN_PAGE, MAX_PAGE)
    def work_by_key(self, key):
        ret_res = list()
        if not key:
            LOG.error("【%s】key is null, no work." % RUN_MODE)
            return ret_res

        # page
        for page in range(0, self.MAX_PAGE, 1):
            url = TYC_SEARCH_API + '/p%s?key=' % page + parse.quote(key)
            print(url)
            is_ok, search_resp = api_get(url=url,
                                         headers=self.headers,
                                         data={},
                                         resptype='text')
            if not is_ok:
                continue

            soup = BeautifulSoup(search_resp, 'lxml')
            tags = soup.find_all(
                'a', attrs={"tyc-event-ch": "CompanySearch.Company"})
            for tag in tags:
                if not tag or not tag.attrs.get('href'):
                    continue

                res_dict = dict()
                res_dict['tyt_url'] = tag.get('href').strip()
                res_dict['name'] = tag.get_text().strip()

                detail_res = self.detail_by_url(res_dict.get('tyt_url'))
                res_dict.update(detail_res)
                print(res_dict['name'], res_dict['tyt_url'],
                      str(True if res_dict else False))
                ret_res.append(res_dict)
                random_sleep()
                #     break
                # break
        return ret_res
Exemplo n.º 11
0
    def to_excel(self, datas, columns: dict, exlname=None):
        if not datas:
            LOG.error('to excel datas is null')
            return False
        if not columns:
            LOG.error('to excel columns is null')
            return False
        if not isinstance(columns, dict):
            LOG.error('to excel columns is need dict')
            return False
        if not exlname:
            exlname = os.path.join(get_excel_folder(), '%s.xls' % get_now())

        f = xlwt.Workbook(encoding='utf-8')
        sheet = f.add_sheet('sheet', cell_overwrite_ok=True)
        row0 = list(columns.keys())
        row0.insert(0, 'ID')
        columns.update({'ID': '序号'})

        style_title = xlwt.XFStyle()
        font = xlwt.Font()
        font.name = 'Times New Roman'
        font.bold = True
        font.color_index = 4
        font.height = 220
        style_title.font = font

        style_content = xlwt.XFStyle()
        font = xlwt.Font()
        font.name = 'Times New Roman'
        font.bold = False
        font.color_index = 4
        font.height = 220
        style_content.font = font

        # 标题
        for i in range(0, len(row0)):
            sheet.write(0, i, columns.get(row0[i]), style_title)

        row = 1
        for line in datas:
            if not line:
                continue
            for index, data in enumerate(row0):
                if index == 0:
                    sheet.write(row, index, row, style_title)
                else:
                    sheet.write(row, index, line.get(row0[index]),
                                style_content)

            row += 1

        f.save(exlname)
        return exlname
Exemplo n.º 12
0
 def _is_not_max_range_die(self, max_range):
     if not max_range:
         LOG.error("It's not have max range")
         sys.exit()
Exemplo n.º 13
0
    def work_by_key(self, key):
        print(key, '@' * 100)
        ret_res = list()
        if not key:
            LOG.error("【%s】key is null, no work." % RUN_MODE)
            return ret_res

        # page
        is_page = False
        for ct in range(9):
            url = '%s/p%s?key=%s' % (TYC_SEARCH_API, 1, parse.quote(key))
            is_ok, search_resp = api_get(url=url,
                                         headers=self.headers,
                                         data={},
                                         resptype='text')
            self.headers['Cookie'] = cookies_get()
            if is_ok:
                is_page = True
                break
        page_vlas = 200
        if not is_page:
            page_vlas = 200
        else:
            et_late = etree.HTML(search_resp)
            page_num = [
                i.xpath('./li/a/text()')[-2] for i in et_late.xpath(
                    '//div[@class="result-footer"]/div[@class=" search-pager"]/ul'
                )
            ]
            if page_num:
                page_vlas = str(page_num[0]).replace('.', '')

        LOG.critical(f'搜索关键词为:{key}, 总页面:{page_vlas}------------------------')
        print(f'搜索关键词为:{key}, 总页面:{page_vlas}------------------------')
        # 公司列表
        for page in range(1, int(page_vlas), 1):
            self.headers['Cookie'] = cookies_get()
            url = '%s/p%s?key=%s' % (TYC_SEARCH_API, page, parse.quote(key))
            print(url, 'Q' * 80)
            is_ok, search_resp = api_get(url=url,
                                         headers=self.headers,
                                         data={},
                                         resptype='text')
            if not is_ok:
                continue
            soup = BeautifulSoup(search_resp, 'lxml')
            tags = soup.find_all(
                'a', attrs={"tyc-event-ch": "CompanySearch.Company"})

            def while_req(url):
                self.headers['Cookie'] = cookies_get()
                sub_is_ok, sub_search_resp = api_get(url=url,
                                                     headers=self.headers,
                                                     data={},
                                                     resptype='text')
                return sub_is_ok, sub_search_resp

            HTNL = etree.HTML(search_resp)
            print(
                HTNL.xpath(
                    '//*[@id="web-content"]/div/div[1]/div[3]/div[2]/div[1]/div/div[3]/div[1]/a/text()'
                ), 'A' * 80)

            # 添加手动验证功能
            if len(tags) == 0:
                while 1:
                    if is_ok and len(tags) > 0:
                        break
                    else:
                        print(url)
                        LOG.critical('验证############### %s ###############' %
                                     url)
                        random_sleep(20, 25)
                        self.headers['Cookie'] = cookies_get()
                        is_ok, search_resp = while_req(url)
                        soup = BeautifulSoup(search_resp, 'lxml')
                        tags = soup.find_all(
                            'a',
                            attrs={"tyc-event-ch": "CompanySearch.Company"})
            eto = etree.HTML(search_resp)
            user_name = eto.xpath('//div[@nav-type="user"]/a/text()')

            is_success = False
            for i in range(9):
                if not ''.join(user_name):
                    self.headers['Cookie'] = cookies_get()
                    is_ok, search_resp = while_req(url)
                    soup = BeautifulSoup(search_resp, 'lxml')
                    tags = soup.find_all(
                        'a', attrs={"tyc-event-ch": "CompanySearch.Company"})
                    is_success = True
                    break
            if is_success:
                for tag in tags:
                    if not tag or not tag.attrs.get('href'):
                        continue

                    res_dict = dict()
                    res_dict['tyt_url'] = tag.get('href').strip()
                    res_dict['name'] = tag.get_text().strip()
                    res_dict['company_id'] = str(
                        tag.get('href')).split('/')[-1]
                    res_dict['label_index'] = str(key)
                    res_dict['rquest_url'] = url
                    res_dict['source'] = '天眼查'
                    res_dict['created_time'] = str(datetime.now())
                    result = _insert(res_dict)
                    if result.get('status', False):
                        c_id = str(result.get('_id'))
                        try:
                            # detail_res = self.detail_by_url(res_dict.get('tyt_url'))

                            self.detail_by_url(res_dict.get('tyt_url'), c_id)
                        except:
                            try:
                                self.detail_by_url(res_dict.get('tyt_url'),
                                                   c_id)
                            except:
                                pass

                    ret_res.append(res_dict)
                    random_sleep(1, 2.5)
            #     break
            # break
        return ret_res
Exemplo n.º 14
0
    def work_by_key(self, key):
        ret_res = list()
        if not key:
            LOG.error("【%s】key is null, no work." % RUN_MODE)
            return ret_res

        # page
        for page in range(1, self.MAX_PAGE, 1):
            url = '%s/p%s?key=%s' % (TYC_SEARCH_API, page, parse.quote(key))
            print(url)
            print(cookies_get())
            self.headers['Cookie'] = cookies_get()
            is_ok, search_resp = api_get(url=url,
                                         headers=self.headers,
                                         data={},
                                         resptype='text')
            if not is_ok:
                continue

            with open('company_list.html', 'w', encoding='utf-8') as wf:
                wf.write(search_resp)
            soup = BeautifulSoup(search_resp, 'lxml')
            tags = soup.find_all(
                'a', attrs={"tyc-event-ch": "CompanySearch.Company"})

            def while_req(url):
                sub_is_ok, sub_search_resp = api_get(url=url,
                                                     headers=self.headers,
                                                     data={},
                                                     resptype='text')
                return sub_is_ok, sub_search_resp

            # 添加手动验证功能
            if len(tags) == 0:
                while 1:
                    if is_ok and len(tags) > 0:
                        break
                    else:
                        LOG.critical('验证############### %s ###############' %
                                     url)
                        random_sleep(20, 25)
                        self.headers['Cookie'] = cookies_get()
                        is_ok, search_resp = while_req(url)
                        soup = BeautifulSoup(search_resp, 'lxml')
                        tags = soup.find_all(
                            'a',
                            attrs={"tyc-event-ch": "CompanySearch.Company"})

            for tag in tags:
                if not tag or not tag.attrs.get('href'):
                    continue

                res_dict = dict()
                res_dict['tyt_url'] = tag.get('href').strip()
                res_dict['name'] = tag.get_text().strip()

                self.save_list(
                    tag.get('href').strip() + '-' + tag.get_text().strip())
                # print(res_dict['name'], res_dict['tyt_url'], str(True if res_dict else False))
                print(res_dict)
                ret_res.append(res_dict)
                random_sleep(1, 2.5)
Exemplo n.º 15
0
    def work_by_key(self,
                    key,
                    min_page,
                    max_page,
                    type='default',
                    queue=None,
                    cid=None,
                    sub_cid=None,
                    city_info=None,
                    sub_city_info=None):
        ret_res = list()
        if not key:
            LOG.error("【%s】key is null, no work." % RUN_MODE)
            return ret_res

        # page
        for page in range(min_page, max_page + 1, 1):
            if API_MODE == 'tyc' and type == 'default':
                url = '%s/p%s?key=%s' % (TYC_SEARCH_API, page,
                                         parse.quote(key))
            elif API_MODE == 'tyc' and type == 'city':
                url = '%s/p%s?key=%s&base=%s' % (TYC_SEARCH_API, page,
                                                 parse.quote(key),
                                                 city_info.get('name'))
            elif API_MODE == 'tyc' and type == 'sub_city':
                if cid in ZXS_CITY_IDS:
                    url = '%s/p%s?key=%s&base=%s&areaCode=%s' % (
                        TYC_SEARCH_API, page, parse.quote(key),
                        sub_city_info.get('name'), sub_city_info.get('code'))
                else:
                    url = '%s/p%s?key=%s&base=%s' % (TYC_SEARCH_API, page,
                                                     parse.quote(key),
                                                     sub_city_info.get('name'))
            elif API_MODE == 'pro' and type == 'default':
                url = '%s/p%s?key=%s' % (TYC_PRO_SEARCH_API, page,
                                         parse.quote(key))
            elif API_MODE == 'pro' and type == 'city':
                url = '%s/p%s?key=%s&base=%s' % (TYC_PRO_SEARCH_API, page,
                                                 parse.quote(key),
                                                 city_info.get('name'))
            elif API_MODE == 'pro' and type == 'sub_city':
                if cid in ZXS_CITY_IDS:
                    url = '%s/p%s?key=%s&base=%s&areaCode=%s&baseArea=%s' \
                          % (TYC_PRO_SEARCH_API, page, parse.quote(key), city_info.get('name'), sub_city_info.get('code'), parse.quote(sub_city_info.get('name')))
                else:
                    url = '%s/p%s?key=%s&base=%s' % (TYC_PRO_SEARCH_API, page,
                                                     parse.quote(key),
                                                     sub_city_info.get('name'))
            else:
                LOG.critical('====== API_MODE is not in [tyc, pro] ======')
                sys.exit(1)
            LOG.info('%s[%s]%s' % (key, API_MODE, url))

            self.headers['Referer'] = url
            is_ok, search_resp = api_get(url=url,
                                         headers=self.headers,
                                         data={},
                                         resptype='text')
            if not is_ok:
                continue
            if self.check_no(url, _type='page'):
                continue

            soup = BeautifulSoup(search_resp, 'lxml')
            tags = soup.find_all(
                'a', attrs={"tyc-event-ch": "CompanySearch.Company"})

            def while_req(url):
                sub_is_ok, sub_search_resp = api_get(url=url,
                                                     headers=self.headers,
                                                     data={},
                                                     resptype='text')
                return sub_is_ok, sub_search_resp

            # 添加手动验证功能
            if len(tags) == 0:
                while 1:
                    if is_ok and len(tags) > 0:
                        break
                    else:
                        LOG.critical('验证############### %s ###############' %
                                     url)
                        random_sleep(20, 25)
                        is_ok, search_resp = while_req(url)
                        soup = BeautifulSoup(search_resp, 'lxml')
                        tags = soup.find_all(
                            'a',
                            attrs={"tyc-event-ch": "CompanySearch.Company"})

            for tag in tags:
                if not tag or not tag.attrs.get('href'):
                    continue

                res_dict = dict()
                if API_MODE == 'tyc':
                    tyc_url = tag.get('href').strip()
                elif API_MODE == 'pro':
                    tyc_url = '%s%s/background' % (TYC_PRO_DETAIL_API,
                                                   tag.get('href').strip())
                else:
                    tyc_url = ''
                res_dict['tyc_url'] = tyc_url
                res_dict['name'] = tag.get_text().strip()
                res_dict['key'] = key
                res_dict['is_send_email'] = False
                res_dict['city'] = city_info.get(
                    'full_name') if city_info else '-'
                res_dict['sub_city'] = sub_city_info.get(
                    'full_name') if sub_city_info else '-'
                detail_res = list()
                if API_MODE == 'tyc':
                    detail_res = self.detail_by_url(res_dict.get('tyc_url'))
                elif API_MODE == 'pro':
                    detail_res = self.detail_pro_by_url(
                        res_dict.get('tyc_url'))
                res_dict.update(detail_res)
                print('%s[%s] %s' %
                      (res_dict['name'], str(True if res_dict else False),
                       res_dict['tyc_url']))
                ret_res.append(res_dict)
                if queue:
                    queue.put(res_dict)
                random_sleep(3.5, 4.5)
                if IS_TEST_BREAK:
                    break
            if IS_TEST_BREAK:
                break
        return ret_res
Exemplo n.º 16
0
    def work_by_key(self, key, min_page=0, max_page=5, queue=None):
        ret_res = list()
        if not key:
            LOG.error("【%s】key is null, no work." % RUN_MODE)
            return ret_res

        if not min_page:
            min_page = self.MIN_PAGE
        if not max_page:
            max_page = self.MAX_PAGE

        LOG.info('%s[%s ~ %s]' % (key, min_page, max_page))
        # page
        for page in range(min_page, max_page, 1):
            if API_MODE == 'tyc':
                url = '%s/p%s?key=%s' % (TYC_SEARCH_API, page,
                                         parse.quote(key))
            elif API_MODE == 'pro':
                url = '%s/p%s?key=%s' % (TYC_PRO_SEARCH_API, page,
                                         parse.quote(key))
            else:
                LOG.critical('====== API_MODE is not in [tyc, pro] ======')
                sys.exit(1)
            LOG.info('%s[%s]%s' % (key, API_MODE, url))

            is_ok, search_resp = api_get(url=url,
                                         headers=self.headers,
                                         data={},
                                         resptype='text')

            if not is_ok:
                continue

            soup = BeautifulSoup(search_resp, 'lxml')
            tags = soup.find_all(
                'a', attrs={"tyc-event-ch": "CompanySearch.Company"})

            def while_req(url):
                sub_is_ok, sub_search_resp = api_get(url=url,
                                                     headers=self.headers,
                                                     data={},
                                                     resptype='text')
                return sub_is_ok, sub_search_resp

            # 添加手动验证功能
            if len(tags) == 0:
                while 1:
                    if is_ok and len(tags) > 0:
                        break
                    else:
                        LOG.critical('验证############### %s ###############' %
                                     url)
                        random_sleep(20, 25)
                        is_ok, search_resp = while_req(url)
                        soup = BeautifulSoup(search_resp, 'lxml')
                        tags = soup.find_all(
                            'a',
                            attrs={"tyc-event-ch": "CompanySearch.Company"})

            for tag in tags:
                if not tag or not tag.attrs.get('href'):
                    continue

                res_dict = dict()
                if API_MODE == 'tyc':
                    tyc_url = tag.get('href').strip()
                elif API_MODE == 'pro':
                    tyc_url = '%s%s/background' % (TYC_PRO_DETAIL_API,
                                                   tag.get('href').strip())
                else:
                    tyc_url = ''
                res_dict['tyc_url'] = tyc_url
                res_dict['name'] = tag.get_text().strip()
                res_dict['key'] = key
                detail_res = list()
                if API_MODE == 'tyc':
                    detail_res = self.detail_by_url(res_dict.get('tyc_url'))
                elif API_MODE == 'pro':
                    detail_res = self.detail_pro_by_url(
                        res_dict.get('tyc_url'))
                res_dict.update(detail_res)
                print('%s[%s] %s' %
                      (res_dict['name'], str(True if res_dict else False),
                       res_dict['tyc_url']))
                ret_res.append(res_dict)
                if queue:
                    queue.put(res_dict)
                random_sleep(3.2, 4.5)
                if IS_TEST_BREAK:
                    break
            if IS_TEST_BREAK:
                break
        return ret_res
Exemplo n.º 17
0
 def not_found_error(error):
     LOG.error("%s is not found 404" % request.url)
     return render_template('errors/404.html', ), 404
Exemplo n.º 18
0
 def server_error(error):
     LOG.error("%s is server error 500" % request.url)
     return render_template('errors/500.html'), 500