Пример #1
0
def main():
    disk.getFileLockOrDie("locks/backend.pid")
    storedUrlsSet = frozenset(storage.getUrls())
    logging.info("Got %s stored urls", len(storedUrlsSet))

    validUrlsSet = frozenset(getValidUrls())
    if len(validUrlsSet) == 0:
        logging.error("No valid URL found")
        sys.exit(1)

    logging.info("Found %s valid urls", len(validUrlsSet))
    invalidUrlsSet = storedUrlsSet - validUrlsSet
    for url in invalidUrlsSet:
        storage.purge(url)

    discoverUnknownUrls(validUrlsSet - storedUrlsSet)
Пример #2
0
def main():
    disk.getFileLockOrDie("locks/backend.pid")

    newMap = {}
    oldMap = load(AntiFraud.REAL_DATES_PATH)
    analyses = report.fetchReport()
    for analysis in analyses:
        oldHashKey = getOldHashKey(analysis)
        newHashKey = AntiFraud._getHashKey(analysis)
        createdDate = oldMap.get(oldHashKey)
        if createdDate is not None:
            olderDate = min(newMap.get(newHashKey, "3000"), createdDate)
            newMap[newHashKey] = olderDate
            logging.debug("Found date %s for %s", olderDate, newHashKey)

    if len(newMap) == 0:
        raise Exception("No dates found.")
    save(newMap, AntiFraud.REAL_DATES_PATH)
Пример #3
0
def main():
    disk.getFileLockOrDie("locks/backend.pid")
    num_found = 0
    for index in xrange(10**6):
        wasNew = False
        urls = urlCollector.collectUrls(index)
        num_found += len(urls)
        for url in urls:
            if not storage.isDiscovered(url):
                wasNew = True
                logging.info("Discovered new url: %s", url)
                storage.storeUrl(url)
                tobe.toDownload(url)
        if not wasNew:
            break

    if num_found == 0:
        logging.error("No valid URL discovered")
Пример #4
0
def main():
    disk.getFileLockOrDie("locks/backend.pid")

    options, args = parseArgs()
    if options.fix:
        urls = [url for url in storage.getUrls() if not storage.isDownloaded(url)]
    else:
        urls = tobe.getToBeDownloaded()

    logging.info("Downloading %s urls", len(urls))
    for url in urls:
        content = _try_download(url)
        if content is not None:
            storage.storeContent(url, content)
            tobe.toAnalyse(url)

    tobe.nothingToBeDownloaded()
    logging.info("Downloaded %s urls", len(urls))
Пример #5
0
def main():
    disk.getFileLockOrDie("locks/backend.pid")
    options, args = parseArgs()
    if options.fix:
        urls = [url for url in storage.getUrls() if storage.isDownloaded(url)]
        tobe.nothingToBeReported()
        if not options.clean:
            urls = _skipAnalysed(urls)
    else:
        urls = tobe.getToBeAnalysed()

    for url in urls:
        analysis = analyser.analyseUrl(url)
        storage.storeAnalysis(url, analysis)
        tobe.toReport(url)
    tobe.nothingToBeAnalysed()
    logging.info("Analysed %s urls", len(urls))

    cleanStart = options.fix or options.clean
    updateReport(cleanStart)
    warmer.updateWarmPicture()