def main(): disk.getFileLockOrDie("locks/backend.pid") storedUrlsSet = frozenset(storage.getUrls()) logging.info("Got %s stored urls", len(storedUrlsSet)) validUrlsSet = frozenset(getValidUrls()) if len(validUrlsSet) == 0: logging.error("No valid URL found") sys.exit(1) logging.info("Found %s valid urls", len(validUrlsSet)) invalidUrlsSet = storedUrlsSet - validUrlsSet for url in invalidUrlsSet: storage.purge(url) discoverUnknownUrls(validUrlsSet - storedUrlsSet)
def main(): disk.getFileLockOrDie("locks/backend.pid") options, args = parseArgs() if options.fix: urls = [url for url in storage.getUrls() if not storage.isDownloaded(url)] else: urls = tobe.getToBeDownloaded() logging.info("Downloading %s urls", len(urls)) for url in urls: content = _try_download(url) if content is not None: storage.storeContent(url, content) tobe.toAnalyse(url) tobe.nothingToBeDownloaded() logging.info("Downloaded %s urls", len(urls))
def main(): disk.getFileLockOrDie("locks/backend.pid") options, args = parseArgs() if options.fix: urls = [url for url in storage.getUrls() if storage.isDownloaded(url)] tobe.nothingToBeReported() if not options.clean: urls = _skipAnalysed(urls) else: urls = tobe.getToBeAnalysed() for url in urls: analysis = analyser.analyseUrl(url) storage.storeAnalysis(url, analysis) tobe.toReport(url) tobe.nothingToBeAnalysed() logging.info("Analysed %s urls", len(urls)) cleanStart = options.fix or options.clean updateReport(cleanStart) warmer.updateWarmPicture()