示例#1
0
def getValidUrls():
    allUrls = set()
    for index in xrange(10**6):
        lastSize = len(allUrls)
        urls = urlCollector.collectUrls(index)
        allUrls.update(urls)
        if len(allUrls) == lastSize:
            return allUrls
示例#2
0
def main():
    disk.getFileLockOrDie("locks/backend.pid")
    num_found = 0
    for index in xrange(10**6):
        wasNew = False
        urls = urlCollector.collectUrls(index)
        num_found += len(urls)
        for url in urls:
            if not storage.isDiscovered(url):
                wasNew = True
                logging.info("Discovered new url: %s", url)
                storage.storeUrl(url)
                tobe.toDownload(url)
        if not wasNew:
            break

    if num_found == 0:
        logging.error("No valid URL discovered")