Python LogHandler.warning примеры использования

Язык программирования: Python

Пространство имен/Пакет: util.loghandler

Класс/Тип: LogHandler

Метод/Функция: warning

Примеров на hotexamples.com: 2

Python LogHandler.warning - 2 примера найдено. Это лучшие примеры Python кода для util.loghandler.LogHandler.warning, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

LogHandler(8)

info(5)

warn(4)

error(2)

warning(2)

Пример #1

Показать файл

class ZhiDaoGenerator:
    """ 百度知道关键词抓取，关键词有两部分，1.问题内容包含的关键词，2.问题所属的关键词 """
    def __init__(self):
        self.logger = LogHandler('zhidao_crawl')
        try:
            # 知道请求头，获取Cookie的BAIDUID，否则抓取不到数据
            self.zhidao_headers = {
                'User-Agent':
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 16 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
                'Cookie':
                'BAIDUID=%s' % requests.get('https://zhidao.baidu.com/browse/',
                                            timeout=3).cookies['BAIDUID']
            }
        except Exception as e:
            raise e

    def __zd_question_keywords(self, url):
        """ 获取知道问题内容的关键词 """
        try:
            req = requests.get(url, timeout=3)
            req.encoding = req.apparent_encoding
            kws = []
            for kw_tag in BeautifulSoup(req.text, "html.parser").find_all(
                    'li',
                    class_=lambda class_: class_ and ('word grid' in class_)):
                kw = kw_tag.find(class_="word-text")
                if kw is not None:
                    kws.append(kw.string)
            return kws
        except RequestException as re:
            self.logger.warning(re)
            return []
        except Exception as e:
            raise e

    @timethis
    def crawl_zhidao_words(self, save_keyword):
        """ 百度知道抓取 """
        try:
            req = requests.get(
                url=
                'https://zhidao.baidu.com/list?_pjax=%23j-question-list-pjax-container',
                headers=self.zhidao_headers,
                timeout=3)
            req.encoding = req.apparent_encoding
            for qs in BeautifulSoup(req.text, "html.parser").find_all(
                    'div', class_='question-title-section'):
                # 问题所属领域关键词提取
                for qt in map(lambda x: x.string.replace('\n', ''),
                              qs.find_all('a', class_='tag-item')):
                    save_keyword(qt)
                # 问题内容包含关键词提取
                for qm in self.__zd_question_keywords(qs.a.get('href')):
                    save_keyword(str(qm))
        except RequestException as re:
            self.logger.warn(re)
        except Exception as e:
            raise e

Пример #2

Показать файл

class ZhihuTopicGenerator:
    """ 分为两个过程:id获取和扩展 """
    def __init__(self):
        # 代理ip
        self.p_receiver = ProxiesReceiver()
        # 建立会话，设置requests重连次数和重连等待时间
        self.session = requests.Session()
        retry = Retry(connect=3, backoff_factor=0.5)
        adapter = HTTPAdapter(max_retries=retry)
        self.session.mount('https://', adapter)
        self.logger = LogHandler('topics_generator')
        logging.getLogger("urllib3").setLevel(logging.ERROR)

    def __get_topic_message(self, tid):
        """
        话题信息获取,最终结果存放:
        zhTopicMessage哈希表内，key-话题id，value-{'name':名称, 'introduction': 简介, 'questions_count':问题数,
        'best_answers_count':精华问题数, 'followers_count':关注人数, 'best_answerers_count':优秀回答者人数}
        """
        try:
            j_rst = self.session.get(url=topic_message_url % tid,
                                     headers=headers,
                                     proxies=self.p_receiver.one_random,
                                     timeout=3).json()
            if redis_cli.hset(
                    'zhTopicMessage', tid,
                    str({
                        "name":
                        j_rst.get("name"),
                        'introduction':
                        j_rst.get("introduction"),
                        "questions_count":
                        j_rst.get("questions_count"),
                        "best_answers_count":
                        j_rst.get("best_answers_count"),
                        'followers_count':
                        j_rst.get("followers_count"),
                        "best_answerers_count":
                        j_rst.get("best_answerers_count")
                    })):
                # 待获取相关信息
                redis_cli.sadd('zhNewTopicID', tid)
                self.logger.info("zhNewTopicID:%d", tid)
                return True
        except RequestException as re:
            self.logger.warn(re)
        except Exception as e:
            raise e
        return False

    def __get_hot_topics(self):
        """ 搜索zhTemporaryWords内关键词，从其结果中得到相关话题id和名称 """
        tw = redis_cli.block_pop('zhTemporaryWords').decode('utf-8')  # pop
        # 不断翻页至最后,最大获取1000条
        for offset in range(0, 1000, 10):
            try:
                url = zh_search_url % (tw, offset)
                j_topics = self.session.get(url=url,
                                            headers=headers,
                                            proxies=self.p_receiver.one_random,
                                            timeout=3).json()
                topics = j_topics.get('data', None) if j_topics else None
                if not topics:  # 已到最后
                    return
                # 每一页获取话题相关详细信息
                for t in topics:
                    if t.get('object') and t.get('object').get('id'):
                        try:
                            tid = int(t['object']['id'])
                        except ValueError as ve:
                            self.logger.warning(ve, t['object']['id'])
                            continue
                        if self.__get_topic_message(tid):
                            yield tid
                    else:
                        break
            except RequestException as re:
                self.logger.warn((re, url))
            except ReadTimeout as rte:
                self.logger.warn((rte, url))
            except KeyError as ke:
                self.logger.warn((ke, url))
            except Exception as e:
                raise e

    @staticmethod
    def __save_to_dag(child_topic_id, parent_topic_id):
        """ 按其结构保存为有向无环图 """
        ids = redis_cli.hget('zhTopicDAG', parent_topic_id)
        if not ids or ids.decode() == "None":
            redis_cli.hset('zhTopicDAG', parent_topic_id,
                           str({child_topic_id}))
        else:
            new_ids = eval(ids)
            new_ids.add(child_topic_id)
            redis_cli.hset('zhTopicDAG', parent_topic_id, str(new_ids))

    def __add_topics(self, url, topic_id, func):
        try:
            req = self.session.get(url=url % int(topic_id),
                                   headers=headers,
                                   proxies=self.p_receiver.one_random,
                                   timeout=3)
            if not req:  # 获取子父话题有可能不存在
                return
            for p in req.json()['data']:
                expand_topic_id = int(p['id'])
                func(topic_id, expand_topic_id)
                self.__get_topic_message(expand_topic_id)
        except RequestException as re:
            self.logger.warn(re)
        except ReadTimeout as rte:
            self.logger.warn(rte)
        except Exception as e:
            raise e

    def __expand_topics(self, tid):
        """ 话题扩展，分别向父子话题不断扩展 """
        self.__add_topics(parent_url, tid,
                          lambda a, b: self.__save_to_dag(a, b))
        self.__add_topics(child_url, tid,
                          lambda a, b: self.__save_to_dag(b, a))

    @timethis
    def process(self):
        for tid in self.__get_hot_topics():
            self.__expand_topics(tid)