def main(): disk.getFileLockOrDie("locks/backend.pid") num_found = 0 for index in xrange(10**6): wasNew = False urls = urlCollector.collectUrls(index) num_found += len(urls) for url in urls: if not storage.isDiscovered(url): wasNew = True logging.info("Discovered new url: %s", url) storage.storeUrl(url) tobe.toDownload(url) if not wasNew: break if num_found == 0: logging.error("No valid URL discovered")
def discoverUnknownUrls(urls): logging.info("Discovering %s new urls", len(urls)) for url in urls: storage.storeUrl(url) tobe.toDownload(url)