Python MongoQueue.repairFast示例

编程语言: Python

命名空间/包名称: mongo_queue

类/类型: MongoQueue

方法/功能: repairFast

hotexamples.com的示例: 2

Python MongoQueue.repairFast - 已找到2个示例。这些是从开源项目中提取的最受好评的mongo_queue.MongoQueue.repairFast现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

MongoQueue(18)

push(10)

clear(5)

peek(5)

repairFast(2)

complete(1)

pop(1)

pushAll(1)

turn_down(1)

示例#1

显示文件

def main(max_threads=5):
    scrape_callback = AlexaCallback()
    cache = MongoCache()
    queue = MongoQueue()

    urls = []
    temple = scrape_callback.seed_url[0:-2]
    for i in range(1, 1189, 1):
        urls.append(temple + str(i) + '/')

    while True:
        now = datetime.now()
        if now.hour < 3 or now.hour > 12:
            queue.repairFast()
            process_crawler(
                urls,
                scrape_callback=scrape_callback,
                cache=cache,
                max_threads=max_threads,
                timeout=30,
                host=urlparse.urlparse(scrape_callback.seed_url).netloc,
                user_agent=
                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36'
            )
        else:
            print 'pass:' + str(now)
            pass
        time.sleep(3600)

示例#2

显示文件

文件： process_test.py 项目： freedomofme/CrawlerNovels

def main(max_threads = 5):
    catlog_callback = AlexaCallback()
    cache = MongoCache()
    queue = MongoQueue()


    client = MongoClient('localhost', 27017, connect=False)
        #create collection to store cached webpages,
        # which is the equivalent of a table in a relational database
    db = client.cache
    cursor = db.books.find()

    urls = []
    while cursor.alive:
        temp = cursor.next()
        temp = temp['link']

        if urlparse.urlparse(catlog_callback.seed_url).netloc == 'www.junzige.la':
            temp = '/novel' + temp[5:-4] + '/'
            temp = normalize(catlog_callback.seed_url, temp)
        elif urlparse.urlparse(catlog_callback.seed_url).netloc == 'www.boluoxs.com':
            temp = 'http://www.boluoxs.com/biquge/0/' + temp[temp.rfind('/') + 1 :temp.rfind('.')] + '/'

        print temp
        urls.append(temp)

    print urls[0]

    while True:
        now = datetime.now()

        if now.hour < 3 or now.hour > 12:
            queue.repairFast()
            process_crawler(urls, scrape_callback=catlog_callback, cache=cache, max_threads=max_threads, timeout=30, host = urlparse.urlparse(catlog_callback.seed_url).netloc, user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36')
            # every time finished, clear the job queue
            queue.clear()
        else:
            print 'pass:' + str(now)
            pass
        time.sleep(3600)