Python RedisClient.put примеры использования

Язык программирования: Python

Пространство имен/Пакет: db.RedisClient

Класс/Тип: RedisClient

Метод/Функция: put

Примеров на hotexamples.com: 2

Python RedisClient.put - 2 примера найдено. Это лучшие примеры Python кода для db.RedisClient.RedisClient.put, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

RedisClient(8)

delete(2)

put(2)

brpop(1)

changeTable(1)

count(1)

exist(1)

get(1)

get_all(1)

hset(1)

lpushlist(1)

nums(1)

sadd(1)

set(1)

Пример #1

Показать файл

Файл: getter.py Проект: Gaterny/ProxyPool

class Getter:
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def limit(self, limit_num=500):
        """
        判断代理数量是否超过代理池设定值
        :param limit_num:
        :return:
        """
        if self.redis.count() >= limit_num:
            return True
        else:
            return False

    def run(self):
        print("Getter is running...")
        if not self.limit():
            for callback_label in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlFunc__[callback_label]
                proxies = self.crawler.get_proxies(callback)
                for proxy in proxies:
                    self.redis.put(proxy)

Пример #2

Показать файл

class ProxyPool:
    def __init__(self):
        self.logger = logger
        self.db = RedisClient(config.NAME, config.HOST, config.PORT,
                              config.PASSWORD)
        self.html_request = HtmlRequest()
        self.html_parser = HtmlParser()

    def update(self):
        """
        更新代理池
        :return:
        """
        while True:
            if self.db.nums() < config.PROXY_MINNUM:
                self.logger.info(
                    "db exists ip:%d, less the minnum, start crawling proxy..."
                    % self.db.nums())
                spawns = []
                gevent.spawn(self.crawl_gatherproxy)
                # for parser in config.parserList:
                #     spawns.append(gevent.spawn(self.crawl, parser))
                #     if len(spawns) >= config.MAX_DOWNLOAD_CONCURRENT:
                #         gevent.joinall(spawns)
                #         spawns = []
                gevent.joinall(spawns)
            else:
                self.logger.info(
                    "db exists ip:%d, enough to use, wait next update..." %
                    self.db.nums())
            time.sleep(config.UPDATE_TIME)

    def crawl(self, parser):
        for url in parser['urls']:
            response = self.html_request.get(url)
            if response:
                proxy_list = self.html_parser.parse(response.text, parser)
                if proxy_list:
                    self.logger.info("get %d proxy from %s", len(proxy_list),
                                     url)
                    for proxy in proxy_list:
                        if self.vaild(proxy):
                            # save proxy
                            self.logger.info("get a vaild proxy: %s", proxy)
                            self.db.put(proxy)

    def crawl_gatherproxy(self):
        headers = {
            'Host': 'www.gatherproxy.com',
            'Proxy-Connection': 'keep-alive',
            'Origin': 'http://www.gatherproxy.com',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Referer': 'http://www.gatherproxy.com/proxylist/country/?c=China',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'zh-CN,zh;q=0.9'
        }
        url = 'http://www.gatherproxy.com/proxylist/country/?c=China'
        data = {"Country": "china", "PageIdx": 1, "Filter": '', "Uptime": 0}
        for page in range(1, 40):
            data['PageIdx'] = page
            response = self.html_request.post(url, data, headers)
            proxy_list = []
            root = etree.HTML(response.text)
            proxys = root.xpath(".//table[@id='tblproxy']/tr[position()>2]")
            for proxy in proxys:
                try:
                    ip_text = proxy.xpath(".//td[2]/script")[0].text
                    ip = ip_text.split("'")[1]
                    port_text = proxy.xpath(".//td[3]/script")[0].text
                    port = str(int(port_text.split("'")[1], 16))
                except Exception as e:
                    self.logger.error("parse proxy error: ", e)
                    continue
                proxy = ":".join([ip, port])
                proxy_list.append(proxy)
            if proxy_list:
                self.logger.info("get %d proxy from %s", len(proxy_list), url)
                for proxy in proxy_list:
                    if self.vaild(proxy):
                        # save proxy
                        self.logger.info("get a vaild proxy: %s", proxy)
                        self.db.changeTable("gatherproxy")
                        self.db.put(proxy)

    def vaild(self, proxy):
        proxies = {"http": "http://{proxy}".format(proxy=proxy)}
        try:
            # 超过20秒的代理就不要了
            r = requests.get('http://httpbin.org/ip',
                             proxies=proxies,
                             timeout=10,
                             verify=False)
            if r.status_code == 200:
                # logger.info('%s is ok' % proxy)
                return True
        except Exception as e:
            # logger.error(str(e))
            return False