Python Cache.setDict示例

编程语言: Python

命名空间/包名称: Spider.cache

类/类型: Cache

方法/功能: setDict

hotexamples.com的示例: 8

Python Cache.setDict - 已找到8个示例。这些是从开源项目中提取的最受好评的Spider.cache.Cache.setDict现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

getDict(11)

setDict(8)

appendList(4)

getQueue(3)

keys(3)

putQueue(3)

keyExist(2)

listItemExist(2)

qempty(2)

randomKey(2)

removeDict(2)

Cache(1)

dempty(1)

flushdb(1)

listLength(1)

removeList(1)

示例#1

0

显示文件

文件： WeNeW_Spider.py 项目： zi-ming/WeNeW

 def initWebsite_delay_dict(self, record):
     """
     @summary: 初始化网站的等待更新时间
     :param record: 网站记录（id, url, xpath, delay_time）
     :return:
     """
     if not Cache.keyExist(cache.websiteDelay_dict, record[0]):
         Cache.setDict(cache.websiteDelay_dict, record[0], record[-1])

示例#2

0

显示文件

文件： WeNeW_Spider.py 项目： zi-ming/WeNeW

 def run(self):
     while not global_EXIT:
         url = ""
         try:
             website_id, url = Cache.getQueue(cache.freshContentUrl_queue, False)
             res = filterContentInfoFunc(website_id, url)
             if res == SpiderResType.success or res == SpiderResType.alreadyExist:
                 Cache.appendList(cache.oldContent_list, url)
             else:
                 Cache.setDict(cache.unrecognized_contentUrl_dict, url, website_id)
         except Exception as e:
             if type(e) is not queue.Empty:
                 log.logMsg(LogType.error, "[FilterContentInfoThread] %s %s" % (url, traceback.format_exc()))

示例#3

0

显示文件

文件： WeNeW_Spider.py 项目： zi-ming/WeNeW

 def run(self):
     while not global_EXIT:
         website_url = ""
         try:
             website_id, website_url, xpath = Cache.getQueue(cache.websiteUrl_queue, False)
             if not filterContentUrlFunc(website_id, website_url, xpath):
                 Cache.setDict(cache.unrecognized_websiteUrl_dict, website_id, (website_url, xpath))
         except Exception as e:
             if type(e) is not queue.Empty:
                 log.logMsg(LogType.error, "[FilterContentUrlThread.freshHandler] %s %s"%(website_url, traceback.format_exc()))
             else:
                 for i in range(10):
                     if global_EXIT: break
                     time.sleep(1)

示例#4

0

显示文件

文件： WeNeW_Spider.py 项目： zi-ming/WeNeW

def resetDelay_time():
    """
    @summary: 重置各网站的爬取延迟
    """
    db = None
    try:
        db = mysql.Mysql()
        for website_id in Cache.keys(cache.websiteDelay_dict):
            record = Cache.getDict(cache.websiteDelay_dict, website_id)
            Cache.setDict(cache.websiteDelay_dict, website_id, (record[0], 0))
            db.saveDelay_time(website_id, 0)
    except Exception as e:
        log.logMsg(LogType.error, "[saveWebsiteDelaytime] %s" % (repr(e)))
    finally:
        if db: db.dispose()

示例#5

0

显示文件

文件： WeNeW_Spider.py 项目： zi-ming/WeNeW

def main():
    thread_count = 3
    pre_threads = []

    initdb()                                            # 初始化redis数据库
    initGlobalArgs()
    initContentUrl_dict()                               # 初始化去重表

    log_thread = log.LogThread()                        # 启动日志记录线程
    log_thread.start()

    QueryWebsiteUrl_thread = QueryWebsiteUrlThread()    # 启动读取网站地址线程
    QueryWebsiteUrl_thread.start()
    pre_threads.append(QueryWebsiteUrl_thread)

    filterContentUrl_thread = FilterContentUrlThread()  # 启动爬取内容地址线程
    filterContentUrl_thread.start()
    pre_threads.append(filterContentUrl_thread)

    for i in range(thread_count):
        thread = FilterContentInfoThread()
        thread.start()
        pre_threads.append(thread)

    unrecognizedWebsiteUrl_thread = UnrecognizedWebsiteUrl_Thread()
    unrecognizedWebsiteUrl_thread.start()
    pre_threads.append(unrecognizedWebsiteUrl_thread)

    unrecognizedContentUrl_thread = UnrecognizedContentUrl_Thread()
    unrecognizedContentUrl_thread.start()
    pre_threads.append(unrecognizedContentUrl_thread)


    while not global_EXIT: pass

    time.sleep(5)

    saveWebsiteDelaytime()              # 保存各网站的延迟时间

    for t in pre_threads:
        t.join()

    log.logMsg(LogType.success, "--------------------bye---------------------\n")
    while not Cache.qempty(cache.log_queue): pass  # 等待把所有日志写到文件中
    Cache.setDict(cache.globalArgs_dict, "LogThread_EXIT", True)
    log_thread.join()

    if db: db.dispose()

示例#6

0

显示文件

文件： WeNeW_Spider.py 项目： zi-ming/WeNeW

def initGlobalArgs():
    """
    @summary:  初始化全局变量
    """
    Cache.setDict(cache.globalArgs_dict, "LogThread_EXIT", False)
    Cache.setDict(cache.globalArgs_dict, "global_EXIT", False)

示例#7

0

显示文件

文件： WeNeW_Spider.py 项目： zi-ming/WeNeW

        log.logMsg(LogType.error, "[saveWebsiteDelaytime] %s" % (repr(e)))
    finally:
        if db: db.dispose()


def command(cmd):
    cmd = cmd.lower()
    if cmd == "delay-time" or cmd == "dt":
        show_delay_time()
    elif cmd == "content-count" or cmd == "cc":
        content_count()
    elif cmd == "reset-delay-time" or cmd == "rdt":
        resetDelay_time()


if __name__ == '__main__':
    print("* Started WeNeW_Spider-programe...")
    thread = threading.Thread(target=main)
    thread.setDaemon(True)
    thread.start()
    while True:
        cmd = input(">>")
        if cmd.lower() == "stop":
            global_EXIT = True
            Cache.setDict(cache.globalArgs_dict, "global_EXIT", True)
            print("* Waiting for the WeNeW_Spider-programe to end...")
            thread.join()
            print("* WeNeW_Spider-programe closed successfully!")
            break
        else:
            command(cmd)

示例#8

0

显示文件

文件： common.py 项目： zi-ming/WeNeW

def incrDelay_time(website_id, timeout):
    """
    @summary: 对网站增加timeout个时间延迟
    """
    record = Cache.getDict(cache.websiteDelay_dict, website_id)
    Cache.setDict(cache.websiteDelay_dict, website_id, int(record) + timeout)