Пример #1
0
def get_basic_hidden_info(shop_id):
    """
    获取基础隐藏信息(名称、地址、电话号、cityid)
    @param shop_id:
    @return:
    """
    assert len(shop_id) == len('H2noKWCDigM0H9c1')
    shop_url = get_shop_url(shop_id)
    url = 'http://www.dianping.com/ajax/json/shopDynamic/basicHideInfo?' \
          'shopId=' + str(shop_id) + \
          '&_token=' + str(get_token(shop_url)) + \
          '&tcv=' + str(spider_config.TCV) + \
          '&uuid=' + str(spider_config.UUID) + \
          '&platform=1' \
          '&partner=150' \
          '&optimusCode=10' \
          '&originUrl=' + str(shop_url)
    # 这里处理解决请求会异常的问题
    retry_time = 5
    while True:
        retry_time -= 1
        r = requests_util.get_requests(url, request_type='proxy, no cookie')
        r_text = requests_util.replace_json_text(r.text, get_font_msg())
        try:
            r_json = json.loads(r_text)
            # 前置验证码过滤
            if r_json['code'] == 200:
                break
            if retry_time == 0:
                logger.warning('替换tsv和uuid')
                exit()
        except:
            pass
    # 验证码处理
    if r_json['code'] == 406:
        verify_page_url = r_json['customData']['verifyPageUrl']
        print('处理验证码,按任意键回车后继续:', verify_page_url)
        input()
    elif r_json['code'] == 200:
        msg = r_json['msg']['shopInfo']
        shop_name = msg['shopName']

        shop_address = BeautifulSoup(msg['address'], 'lxml').text if msg['address'] is not None else '' + \
                                                                                                     BeautifulSoup(msg[
                                                                                                                       'crossRoad'],
                                                                                                                   'lxml').text if \
            msg['crossRoad'] is not None else ''
        shop_number = BeautifulSoup(msg['phoneNo'], 'lxml').text if msg['phoneNo'] is not None else '' + ', ' + \
                                                                                                    BeautifulSoup(
                                                                                                        msg['phoneNo2'],
                                                                                                        'lxml').text if \
            msg['phoneNo2'] is not None else ''
        return {
            '店铺id': shop_id,
            '店铺名': shop_name,
            '店铺地址': shop_address,
            '店铺电话': shop_number
        }
    else:
        logger.warning('json响应码异常,尝试更改提pr,或者提issue')
Пример #2
0
 def get_proxy(self):
     """
     获取代理
     """
     try:
         repeat_nub = int(global_config.getRaw('proxy', 'repeat_nub'))
     except:
         logger.warning('repeat_nub 格式不正确,应为正整数')
         sys.exit()
     # http 提取模式
     if global_config.getRaw('proxy', 'http_extract') == '1':
         # 代理池为空,提取代理
         if len(self.proxy_pool) == 0:
             proxy_url = global_config.getRaw('proxy', 'http_link')
             r = requests.get(proxy_url)
             r_json = r.json()
             for proxy in r_json:
                 # 重复添加,多次利用
                 for _ in range(repeat_nub):
                     self.proxy_pool.append([proxy['ip'], proxy['port']])
         # 获取ip
         proxies = self.http_proxy_utils(self.proxy_pool[0][0],
                                         self.proxy_pool[0][1])
         self.proxy_pool.remove(self.proxy_pool[0])
         return proxies
     # 秘钥提取模式
     elif global_config.getRaw('proxy', 'key_extract') == '1':
         pass
     pass
Пример #3
0
def send_webhook_message(channel_webhook_url, json_content, retries=3):
    """
    Send a message to the specified channel via a webhook.

    :param channel_webhook_url: full URL for the receiving webhook
    :param json_content: dictionary containing data to send (usually "content" or "embed" keys)
    :param retries: number of times to attempt to send message again if it fails
    :return: True if message was successfully sent, False otherwise
    """

    if not config_loader.DISCORD["enabled"]:
        return True

    attempt = 0
    while attempt <= retries:
        try:
            response = requests.post(channel_webhook_url, json=json_content)

            if response.status_code in (200, 204):
                return True

            logger.warning(
                f"Webhook response {response.status_code}: {response.text}")
        except Exception:
            logger.exception(
                "Unexpected error while attempting to send webhook message.")

        time.sleep(5)
        attempt += 1

    logger.error(f"Unable to send webhook message, content: {json_content}")
    return False
Пример #4
0
def post_view(request):
    post_id = request.GET.get('id')
    user_info = get_user_info(request)
    post = Post.objects.filter(id=post_id).values().last()
    if post is None:
        error_msg = '[post_id:{}] invalid access.'.format(post_id)
        logger.warning(error_msg)
        return redirect('/post')

    social_profile = SocialAccount.objects.get(user_id=post.get('user_id'))
    if social_profile:
        user_id = social_profile.user_id
        account_data = social_profile.extra_data
        account_property = account_data.get('properties')
        post['user_id'] = user_id
        post['nickname'] = account_property.get('nickname')
        if account_property.get('profile_image'):
            post['profile_image'] = account_property.get('profile_image').replace('http', 'https')
        else:
            post['profile_image'] = '/static/images/none_profile.png'

    # 댓글
    comment_list = list(Comment.objects.filter(post_id=post.get('id')).order_by('created_at').values())
    post['comment_list'] = get_comment_list(comment_list)

    return render(request, 'main/view.html', dict(post=post, user_info=user_info))
Пример #5
0
    def run(self):
        """Main loop of the class. Calls a brain action every TIME_CYCLE"""
        "TODO: cleanup measure of ips"
        it = 0
        ss = time.time()
        while (not self.kill_event.is_set()):
            start_time = datetime.now()
            if not self.stop_event.is_set():
                try:
                    self.brains.active_brain.execute()
                except AttributeError as e:
                    logger.warning('No Brain selected')
                    logger.error(e)

            dt = datetime.now() - start_time
            ms = (dt.days * 24 * 60 * 60 +
                  dt.seconds) * 1000 + dt.microseconds / 1000.0
            elapsed = time.time() - ss
            if elapsed < 1:
                it += 1
            else:
                ss = time.time()
                # print(it)
                it = 0

            if (ms < TIME_CYCLE):
                time.sleep((TIME_CYCLE - ms) / 1000.0)
        logger.debug('Pilot: pilot killed.')
Пример #6
0
 def do(self, action_name, *args, **kwargs):
     if hasattr(self, 'action_' + action_name):
         getattr(self, 'action_' + action_name)(*args, **kwargs)
     else:
         logger.warning(
             f'no action named "{action_name}"" for action manager of type {type(self).__name__}'
         )
Пример #7
0
def send_payments_details(payments: List[Payment],
                          recipients: List[str]) -> None:
    if not recipients:
        raise Exception(
            '[BATCH][PAYMENTS] Missing PASS_CULTURE_PAYMENTS_DETAILS_RECIPIENTS in environment variables'
        )

    if all(
            map(lambda x: x.currentStatus.status == TransactionStatus.ERROR,
                payments)):
        logger.warning(
            '[BATCH][PAYMENTS] Not sending payments details as all payments have an ERROR status'
        )
    else:
        details = create_all_payments_details(payments)
        csv = generate_payment_details_csv(details)
        logger.info('[BATCH][PAYMENTS] Sending %s details of %s payments' %
                    (len(details), len(payments)))
        logger.info('[BATCH][PAYMENTS] Recipients of email : %s' % recipients)
        try:
            send_payment_details_email(csv, recipients, send_raw_email)
        except MailServiceException as e:
            logger.error(
                '[BATCH][PAYMENTS] Error while sending payment details email to MailJet',
                e)
Пример #8
0
def get_promo_info(shop_id):
    """
    优惠券信息
    @param shop_id:
    @return:
    """
    assert len(shop_id) == len('H2noKWCDigM0H9c1')
    shop_url = get_shop_url(shop_id)
    url = 'ttp://www.dianping.com/ajax/json/shopDynamic/reviewAndStar?shopId=' + str(
        shop_id) + '&cityId=19&mainCategoryId=2821&_token=' + str(get_token(
        shop_url)) + '&uuid=38af1c67-4a50-3220-06f6-bf9f16e71c41.1611146098&platform=1&partner=150&optimusCode=10' \
                     '&originUrl=' + shop_url
    r = requests_util.get_requests(url, request_type='json')
    r_text = requests_util.replace_json_text(r.text, get_font_msg())
    r_json = json.loads(r_text)
    # 验证码处理
    if r_json['code'] == 406:
        verify_page_url = r_json['customData']['verifyPageUrl']
        logger.warning('处理验证码,按任意键继续:', verify_page_url)
        input()
    elif r_json['code'] == 200:
        msg = r_json['msg']['shopInfo']
        shop_name = msg['shopName']
        shop_address = BeautifulSoup(msg['address'],
                                     'lxml').text + BeautifulSoup(
                                         msg['crossRoad'], 'lxml').text
        shop_number = BeautifulSoup(msg['phoneNo'],
                                    'lxml').text + BeautifulSoup(
                                        msg['phoneNo2'], 'lxml').text
        return [shop_name, shop_address, shop_number]
    else:
        logger.warning('json响应码异常,尝试更改提pr,或者提issue')
Пример #9
0
    def go_to_next_page(self):
        try:
            self.driver.execute_script(
                "window.scrollTo(0, document.body.scrollHeight);")
            next_btn = find_next_button(self.driver)
            next_btn.click()
            logger.info('[{}] [UrlCrawler] Click next button'.format(
                self.city))
        except:
            '''
            next button not found
            '''
            try:
                logger.warning(
                    '[{}] [UrlCrawler] Next button not found, try to click page index'
                    .format(self.city))

                current_page_elm = find_paging_elm(self.driver)
                total_page = current_page_elm.get_attribute('data-totalpage')
                current_page = current_page_elm.get_attribute('data-curpage')
                if total_page == current_page:
                    return logger.warning(
                        '[{}] [UrlCrawler] Alreay at last page'.format(
                            self.city))
                current_page = int(current_page)
                elm = find_paging_elm_index(self.driver, current_page + 1)
                elm.click()
                logger.info(
                    '[{}] [UrlCrawler] Click page index success'.format(
                        self.city))
            except:
                # logger.info('BLOCKED, CHANGE PROXY')
                raise ProxyBlockedException()
Пример #10
0
def array_min_max_norm(arr_src: np.ndarray,
                       prm_file_name: str,
                       do_fit: bool = True) -> np.ndarray:
    '''
    arr_scaled = arr_min_max_norm(arr_src, 'test1.pkl', True)
    arr_scaled = arr_min_max_norm(arr_src, 'test2.pkl', False)

    Params
    arr_src: source np.array
    prm_file_name: parameter file name
    do_fit: parames are obtained by fit if True, else params are from file.
    Returns
    arr_scaled: np.array scaled between 0 and 1
    '''
    min_max_norm = MinMaxNorm(prm_file_name)
    if not os.path.exists(prm_file_name):
        logger.warning('parame file %s does not exist. do fit', prm_file_name)
        do_fit = True
    if do_fit:
        logger.info('fit and transform')
        arr_scaled = min_max_norm.fit_transform(arr_src)
        min_max_norm.save_param()
    else:
        logger.info('transform using parameters from file')
        min_max_norm.load_param()
        arr_scaled = min_max_norm.transform(arr_src)
    return arr_scaled
Пример #11
0
 def popUp(self, text=None, move=True):
     if self._fit_to_content['row']:
         self.labelList.setMinimumHeight(
             self.labelList.sizeHintForRow(0) * self.labelList.count() + 2
         )
     if self._fit_to_content['column']:
         self.labelList.setMinimumWidth(
             self.labelList.sizeHintForColumn(0) + 2
         )
     # if text is None, the previous label in self.edit is kept
     if text is None:
         text = self.edit.text()
     self.edit.setText(text)
     self.edit.setSelection(0, len(text))
     items = self.labelList.findItems(text, QtCore.Qt.MatchFixedString)
     if items:
         if len(items) != 1:
             logger.warning("Label list has duplicate '{}'".format(text))
         self.labelList.setCurrentItem(items[0])
         row = self.labelList.row(items[0])
         self.edit.completer().setCurrentRow(row)
     self.edit.setFocus(QtCore.Qt.PopupFocusReason)
     if move:
         self.move(QtGui.QCursor.pos())
     return self.edit.text() if self.exec_() else None
Пример #12
0
    def save(self, param: Any):
        '''
        save param by pickle
        Params
        param: variable to save
        '''
        if param is None:
            logger.error('param is None')
            return
        if self._param_file_name is None:
            logger.warning('file name is None')
            return
        if os.path.exists(self._param_file_name):
            logger.warning('%s exists. overwriting.' % self._param_file_name)
        dir_name = os.path.dirname(self._param_file_name)
        dir_name = '.' if dir_name == '' else dir_name
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)
            logger.info('%s created.' % dir_name)

        try:
            with open(self._param_file_name, 'wb') as f:
                pickle.dump(param, f)
                logger.info('%s saved.' % self._param_file_name)
        except IOError as exc:
            raise exc
Пример #13
0
    def build_jobs_table(self) -> Union[None, pd.DataFrame]:
        """ Builds the table of jobs according to search terms criteria """

        df = self._get_dataframe()
        if df is None:
            logger.warning('dataframe is None')
            return None
        logger.info('Got the dataframe')

        if self._terms is not None:
            df['text_processed'] = df['text'].apply(self._text_process)
            df['contains_terms'] = df['text_processed'].apply(
                lambda x: self._search_terms(terms=self._terms, text=x))
            filtered_df = df.loc[df['contains_terms'] == True].reset_index(
                drop=True)
            filtered_df.drop(['contains_terms', 'text_processed'],
                             axis=1,
                             inplace=True)
        else:
            filtered_df = df
            filtered_df.reset_index(drop=True)

        del df

        hide_index = [''] * len(filtered_df)
        filtered_df.index = hide_index

        return filtered_df
Пример #14
0
    def post(self, request):
        try:
            body = json.loads(request.body)
            slug = body['slug']
            username = body['username']
            email = body['email']
            website = body['website']
            content = body['comment']

            ip, browser, os, _ = get_client_info(request)
        except (KeyError, json.JSONDecodeError):
            logger.warning('param invalid|%s', request.body.decode('utf-8'))
            return ParamInvalidResponse()

        try:
            article = Article.objects.get(slug=slug)
        except Article.DoesNotExist:
            logger.warning('article not exist|%s', slug)
            return ObjectNotExistResponse()

        comment = Comment.objects.create(article=article,
                                         username=username,
                                         email=email,
                                         avatar=Gravatar(email).get_image(),
                                         website=website,
                                         content=content,
                                         publish_dt=datetime.now(),
                                         ipv4=ip,
                                         browser=browser,
                                         os=os)

        logger.info('add comment|%s|%s|%s', article.slug, article.title,
                    comment.username)

        return SuccessResponse()
Пример #15
0
def get_auth_token():
    # Validate payload
    if not (request.json or 'user' in request.json
            or 'password' in request.json or 'app' in request.json):
        content = json.loads(request.json)
        logger.debug(
            'Something is missing from your payload.  Double check everything is there'
        )
        logger.debug(str(content))
        abort(400)

    # Validate credentials
    content = request.json
    user_dict = {'user': content['user'], 'password': content['password']}
    response = app_creds.validate_application_credentials(
        content['app'], user_dict)
    status_code = response['code']

    # Generate Token
    if response['success']:
        logger.info('User %s authenticated, generating token' %
                    content['user'])
        token = auth_token.generate_auth_token(content['user'])
        response = {
            'code': status_code,
            'success': True,
            'message': {
                'token': token.decode('ascii')
            }
        }
        return success('/auth/token/generate', response)
    else:
        logger.warning('User %s not authenticated, reporting error...' %
                       content['user'])
        return warn('authGen', response)
Пример #16
0
    def _build_urls(self) -> None:
        """ Gets the links for num_jobs of jobs """

        if self._sort_by == 'relevance':
            base_url = f'https://il.indeed.com/jobs?q={self._query}&l=israel&start='
        else:  # by date
            base_url = f'https://il.indeed.com/jobs?q={self._query}&l=israel&sort=date&start='

        if self._num_recent_jobs is not None:
            num_jobs = (
                (self._num_recent_jobs // 15) * 10)  # whole numbers of 15
        else:
            num_jobs = self._get_num_jobs(self._get_main_soup(base_url + '0'))

        # get the links
        for i in tqdm.tqdm(range(0, num_jobs, 10),
                           desc='Getting links:',
                           ascii=True):
            soup = self._get_job_soup(base_url + f'{str(i)}')
            self._urls.extend(self._get_jobs_links(soup))

        if i == (num_jobs - 10):  # finished getting all the links
            logger.info(f'Got all the links for the jobs searched')
        else:
            logger.warning(
                f'Got {len(self._urls)} links for the jobs searched')
Пример #17
0
def update_component_token(request):
    """定时更新 component_access_token
    2小时有效期
    定时间隔:10分钟,触发更新
    """
    api = 'https://api.weixin.qq.com/cgi-bin/component/api_component_token'

    ttl = global_store.ttl_component_access_token()
    if ttl > 11 * 60:
        # 无需更新:11 而不是 10 是为了有一次失败容错
        logger.debug('无需更新 component_token|%d', ttl)
        return HttpResponse('无需更新 component_token')

    ticket = global_store.get_component_verify_ticket()
    if not ticket:
        logger.warning('component_verify_ticket不存在')
        return HttpResponse('component_verify_ticket不存在')

    data = {
        'component_appid': config.AppID,
        'component_appsecret': config.AppSecret,
        'component_verify_ticket': ticket
    }
    resp = requests.post(api, data=json.dumps(data), timeout=2)
    resp_data = json.loads(resp.text)

    token = resp_data['component_access_token']
    ex = int(resp_data['expires_in'])
    global_store.set_component_access_token(token, ex)

    logger.info('更新component_access_token|%s|%d', token, ex)

    return HttpResponse('success')
Пример #18
0
 def alter_book_state(self, row, slot, num):
     if not self.shelf[row][slot] == num:
         logger.warning('ERROR')
         raise Exception("you cannot do this")
     else:
         logger.info('successfully altered state of row: {} slot: {}'.format(row, slot))
         self.shelf[row][slot] = 1 if num == 0 else 0
Пример #19
0
 def load_word_freq_dict(path):
     """
     加载切词词典
     :param path:
     :return:
     """
     word_freq = {}
     if path:
         if not os.path.exists(path):
             logger.warning('file not found.%s' % path)
             return word_freq
         else:
             with codecs.open(path, 'r', encoding='utf-8') as f:
                 for line in f:
                     line = line.strip()
                     if line.startswith('#'):
                         continue
                     info = line.split()
                     if len(info) < 1:
                         continue
                     word = info[0]
                     # 取词频,默认1
                     freq = int(info[1]) if len(info) > 1 else 1
                     word_freq[word] = freq
     return word_freq
Пример #20
0
def get_review_and_star(shop_id):
    """
    获取评分、人均,评论数
    @param shop_id:
    @return:
    """
    assert len(shop_id) == len('H2noKWCDigM0H9c1')
    shop_url = get_shop_url(shop_id)
    url = 'http://www.dianping.com/ajax/json/shopDynamic/reviewAndStar?shopId=' + str(
        shop_id) + '&cityId=19&mainCategoryId=2821&_token=' + str(get_token(
        shop_url)) + '&uuid=38af1c67-4a50-3220-06f6-bf9f16e71c41.1611146098&platform=1&partner=150&optimusCode=10' \
                     '&originUrl=' + shop_url
    r = requests_util.get_requests(url, request_type='json')
    r_text = requests_util.replace_json_text(r.text, get_font_msg())
    r_json = json.loads(r_text)
    # 验证码处理
    if r_json['code'] == 406:
        verify_page_url = r_json['customData']['verifyPageUrl']
        logger.warning('处理验证码,按任意键继续:', verify_page_url)
        input()
    elif r_json['code'] == 200:
        shop_base_score = r_json['fiveScore']
        score_title_list = r_json['shopScoreTitleList']
        avg_price = BeautifulSoup(r_json['avgPrice'], 'lxml').text
        review_count = BeautifulSoup(r_json['defaultReviewCount'], 'lxml').text
        score_list = []
        for each in r_json['shopRefinedScoreValueList']:
            score_list.append(BeautifulSoup(each, 'lxml').text)
        scores = ''
        for i, score in enumerate(score_list):
            scores = scores + ' ' + score_title_list[i] + score_list[i]
        return [shop_base_score, scores, avg_price, review_count]
    else:
        logger.warning('json响应码异常,尝试更改提pr,或者提issue')
Пример #21
0
def get_basic_hidden_info(shop_id):
    """
    获取基础隐藏信息(名称、地址、电话号、cityid)
    @param shop_id:
    @return:
    """
    assert len(shop_id) == len('H2noKWCDigM0H9c1')
    shop_url = get_shop_url(shop_id)
    url = 'http://www.dianping.com/ajax/json/shopDynamic/basicHideInfo?' \
          'shopId=' + str(shop_id) + '&_token=' + str(get_token(
        shop_url)) + '&tcv=ck9rmnrofg&uuid=6ca1f51a-7653-b987-3cd6-95f3aadb13b8.1619854599&platform=1' \
                     '&partner=150&optimusCode=10&originUrl=' + str(shop_url)
    r = requests_util.get_requests(url, request_type='json')
    r_text = requests_util.replace_json_text(r.text, get_font_msg())
    r_json = json.loads(r_text)
    # 验证码处理
    if r_json['code'] == 406:
        verify_page_url = r_json['customData']['verifyPageUrl']
        logger.warning('处理验证码,按任意键继续:', verify_page_url)
        input()
    elif r_json['code'] == 200:
        msg = r_json['msg']['shopInfo']
        shop_name = msg['shopName']
        shop_address = BeautifulSoup(msg['address'],
                                     'lxml').text + BeautifulSoup(
                                         msg['crossRoad'], 'lxml').text
        shop_number = BeautifulSoup(msg['phoneNo'],
                                    'lxml').text + BeautifulSoup(
                                        msg['phoneNo2'], 'lxml').text
        return [shop_name, shop_address, shop_number]
    else:
        logger.warning('json响应码异常,尝试更改提pr,或者提issue')
Пример #22
0
    def plot_all(
        self,
        num_chains: int = 0,
        therm_frac: float = 0.,
        title: str = None,
        outdir: str = None,
        subplots_kwargs: dict[str, Any] = None,
        plot_kwargs: dict[str, Any] = None,
    ):
        plot_kwargs = {} if plot_kwargs is None else plot_kwargs
        subplots_kwargs = {} if subplots_kwargs is None else subplots_kwargs

        dataset = self.get_dataset()
        for idx, (key, val) in enumerate(dataset.data_vars.items()):
            color = f'C{idx%9}'
            plot_kwargs['color'] = color
            try:
                _, subfigs, ax = self.plot(
                    # val=torch.from_numpy(val.values).T,
                    # val=tf.Tensor(val.values).T,
                    val=val.values.T,
                    key=str(key),
                    title=title,
                    outdir=outdir,
                    therm_frac=therm_frac,
                    num_chains=num_chains,
                    plot_kwargs=plot_kwargs,
                    subplots_kwargs=subplots_kwargs,
                )
            except:
                logger.warning(f'Unable to plot {key}, skipping')
            # if isinstance(subfigs, tuple):
            if subfigs is not None:
                # _, subfigs = fig
                # ax1 = subfigs[1].subplots(1, 1)

                edgecolor = plt.rcParams['axes.edgecolor']
                plt.rcParams['axes.edgecolor'] = plt.rcParams['axes.facecolor']
                ax = subfigs[0].subplots(1, 1)
                # ax = fig[1].subplots(constrained_layout=True)
                cbar_kwargs = {
                    # 'location': 'top',
                    # 'orientation': 'horizontal',
                }
                im = val.plot(ax=ax, cbar_kwargs=cbar_kwargs)
                im.colorbar.set_label(f'{key}',
                                      fontsize='large')  #, labelpad=1.25)
                sns.despine(subfigs[0],
                            top=True,
                            right=True,
                            left=True,
                            bottom=True)
                # sns.despine(im.axes, top=True, right=True, left=True, bottom=True)
                plt.rcParams['axes.edgecolor'] = edgecolor

            # else:
            #     ax1 = fig.add_subplot(1, 2, 2)
            #     val.plot(ax=ax1)

        return dataset
Пример #23
0
 def delete_book_from_shelf(self, row, slot):
     logger.info('attempting to remove book from row: {} slot: {}'.format(row, slot))
     try:
         self.alter_book_state(row, slot, 1)
         return True
     except Exception as e:
         logger.warning('FAILURE REMOVING BOOK: message: {} row: {} slot: {}'.format(e, row, slot))
         return False
Пример #24
0
 def put_book_on_shelf(self, row, slot):
     logger.info('attempting to add book to row: {} slot: {}'.format(row, slot))
     try:
         self.alter_book_state(row, slot, 0)
         return True
     except Exception as e:
         logger.warning('FAILURE ADDING BOOK: message: {} row: {} slot: {}'.format(e, row, slot))
         return False
Пример #25
0
def get_review_and_star(shop_id):
    """
    获取评分、人均,评论数
    @param shop_id:
    @return:
    """
    assert len(shop_id) == len('H2noKWCDigM0H9c1')
    shop_url = get_shop_url(shop_id)
    url = 'http://www.dianping.com/ajax/json/shopDynamic/reviewAndStar?' \
          'shopId=' + str(shop_id) + \
          '&cityId=19' \
          '&mainCategoryId=2821' \
          '&_token=' + str(get_token(shop_url)) + \
          '&uuid=' + str(spider_config.UUID) + \
          '&platform=1' \
          '&partner=150' \
          '&optimusCode=10' \
          '&originUrl=' + shop_url
    # 这里处理解决请求会异常的问题
    while True:
        r = requests_util.get_requests(url, request_type='proxy, no cookie')
        r_text = requests_util.replace_json_text(r.text, get_font_msg())
        try:
            r_json = json.loads(r_text)
            # 前置验证码过滤
            if r_json['code'] == 200:
                break
        except:
            pass
    # 验证码处理
    if r_json['code'] == 406:
        verify_page_url = r_json['customData']['verifyPageUrl']
        print('处理验证码,按任意键回车后继续:', verify_page_url)
        input()
    elif r_json['code'] == 200:
        shop_base_score = r_json['fiveScore']
        score_title_list = r_json['shopScoreTitleList']
        avg_price = BeautifulSoup(r_json['avgPrice'], 'lxml').text
        review_count = BeautifulSoup(r_json['defaultReviewCount'], 'lxml').text
        score_list = []
        for each in r_json['shopRefinedScoreValueList']:
            score_list.append(BeautifulSoup(each, 'lxml').text)
        # scores = ''
        # for i, score in enumerate(score_list):
        #     scores = scores + ' ' + score_title_list[i] + score_list[i]
        scores = {}
        for i, score in enumerate(score_list):
            scores[score_title_list[i]] = score_list[i]
        # return [shop_base_score, scores, avg_price, review_count]
        return {
            '店铺id': shop_id,
            '店铺总分': shop_base_score,
            '店铺评分': scores,
            '人均价格': avg_price,
            '评论总数': review_count
        }
    else:
        logger.warning('json响应码异常,尝试更改提pr,或者提issue')
Пример #26
0
    def call_action(self, action_name, args=(), stop=False):
        if self.action_manager is not None:
            if stop:
                self.action_manager.stop(action_name, *args)
            else:
                self.action_manager.do(action_name, *args)

        else:
            logger.warning(f'entity of type {type(self).__name__} has no action manager')
    def load_motd(self):
        if not path.exists(self.server.name + ".motd"):
            logger.warning("No motd file for " + self.server.name)
            return ""

        motd_f = open(self.server.name + ".motd")
        motd = motd_f.read()
        motd_f.close()
        return motd
Пример #28
0
def setup_proxy_for_driver(driver: webdriver, test_url=None, times=0):
    if times > 9:
        logger.warning('setup_proxy_for_driver no available proxy')
        raise TooManyTimesException('setup_proxy_for_driver')
    try:
        try:
            # clean cookie and close session
            driver.delete_all_cookies()
            driver.quit()
        except:
            pass
        proxy_url = get_proxy().get('proxy')

        logger.info('proxy get {}'.format(proxy_url))

        capabilities = get_capabilities(proxy_url)

        logger.info('start new session')
        driver.start_session(capabilities=capabilities)
        logger.info('start testing proxy')

        ok = test_proxy(driver, test_url, proxy_url)
        if not ok:
            logger.warning('proxy checking failed for {} times'.format(times +
                                                                       1))
            return setup_proxy_for_driver(driver, test_url, times=times + 1)
        logger.info('proxy works')

        return driver

    except SessionNotCreatedException:
        logger.error('Failed to start a new session')
        return setup_proxy_for_driver(connect_to_driver(),
                                      test_url,
                                      times=times + 1)

    except InvalidSessionIdException as e2:
        logger.error('Session id invalid {}'.format(e2))
        return setup_proxy_for_driver(driver, test_url, times=times + 1)

    except WebDriverException as e3:
        logger.error('No active session with ID')
        return setup_proxy_for_driver(driver, test_url, times=times + 1)

    except NoProxyAvailableException:
        logger.error('No proxy')
        mongo._report_error({
            'error_source': 'proxy',
            'message': 'no_proxy_available',
            'url': None,
            'payload': {}
        })
        return setup_proxy_for_driver(driver, test_url, times=times + 1)

    except Exception as e:
        logger.error(f'setup_proxy_for_driver {e}')
        raise e
Пример #29
0
    def get(self, request, id):
        try:
            snippet = Snippet.objects.get(id=id)
        except Snippet.DoesNotExist:
            logger.warning('snippet not exist|%d', id)
            return ObjectNotExistResponse()

        logger.debug('query snippet|%d', id)
        return SuccessResponse(snippet.to_dict())
Пример #30
0
def get_ip():
    while True:
        try:
            raw_target_item = raw_target_queue.get()
            raw_target_domain_ip_map = {}
            raw_target_domain_list = []
            raw_target_ip_list = []
            # extract domains
            main_domain = raw_target_item['domain']
            if main_domain.startswith('http://') or main_domain.startswith('https://'):
                if main_domain not in raw_target_domain_list:
                    raw_target_domain_list.append(main_domain)
            else:
                main_domain = 'http://' + main_domain
                if main_domain not in raw_target_domain_list:
                    raw_target_domain_list.append(main_domain)
            for sub,sub_ips in raw_target_item['subdomains'].items():
                subdomain = 'http://'+sub
                if subdomain not in raw_target_domain_list:
                    raw_target_domain_list.append(subdomain)
                raw_target_domain_ip_map[subdomain] = []
                for ip in sub_ips:
                    if ip not in raw_target_ip_list:
                        raw_target_ip_list.append(ip)
                    if ip not in raw_target_domain_ip_map[subdomain]:
                        raw_target_domain_ip_map[subdomain].append(ip)
            
            # extract ips
            for domain in raw_target_domain_list:
                if domain not in raw_target_domain_ip_map:
                    raw_target_domain_ip_map[domain] = []
                    paresd = parse.urlparse(domain)
                    netloc = paresd[1]
                    ip = socket.gethostbyname(netloc)
                    if ip:
                        if ip not in raw_target_ip_list:
                            raw_target_ip_list.append(ip)
                        raw_target_domain_ip_map[domain].append(ip)
               
            for ip in raw_target_ip_list:
                ips_result.put(ip)
            for domain in raw_target_domain_list:
                domains_result.put(domain)
            item = {}
            item['name'] =  raw_target_item['name']
            item['url'] =  raw_target_item['url']
            item['domain2ip'] =  raw_target_domain_ip_map
            targets_result.put(item)
            
        except socket.gaierror:
            pass
        except Exception:
            logger.warning("Error when get ip:"+netloc)
        finally:
            pbar.update(1)
            raw_target_queue.task_done()