Exemplo n.º 1
0
def ProcessDatabase(options, logger, repoman, repositories_updated):
    logger.Log('connecting to database')

    db_logger = logger.GetIndented()

    database = Database(options.dsn, readonly=False)
    if options.initdb:
        db_logger.Log('(re)initializing database schema')
        database.CreateSchema()

        db_logger.Log('committing changes')
        database.Commit()

    if options.database:
        db_logger.Log('clearing the database')
        database.Clear()

        package_queue = []
        num_pushed = 0
        start_time = timer()

        def PackageProcessor(packageset):
            nonlocal package_queue, num_pushed, start_time
            FillPackagesetVersions(packageset)
            package_queue.extend(packageset)

            if len(package_queue) >= 10000:
                database.AddPackages(package_queue)
                num_pushed += len(package_queue)
                package_queue = []
                db_logger.Log(
                    '  pushed {} packages, {:.2f} packages/second'.format(
                        num_pushed, num_pushed / (timer() - start_time)))

        db_logger.Log('pushing packages to database')
        repoman.StreamDeserializeMulti(processor=PackageProcessor,
                                       reponames=options.reponames)

        # process what's left in the queue
        database.AddPackages(package_queue)

        if options.fetch and options.update and options.parse:
            db_logger.Log('recording repo updates')
            database.MarkRepositoriesUpdated(repositories_updated)
        else:
            db_logger.Log(
                'not recording repo updates, need --fetch --update --parse')

        db_logger.Log('updating views')
        database.UpdateViews()
        database.ExtractLinks()

        db_logger.Log('updating history')
        database.SnapshotHistory()

        db_logger.Log('committing changes')
        database.Commit()

    logger.Log('database processing complete')
Exemplo n.º 2
0
def LinkProcessorWorker(queue, workerid, options, logger):
    database = Database(options.dsn, readonly=False)

    logger = logger.GetPrefixed('worker{}: '.format(workerid))

    logger.Log('Worker spawned')

    while True:
        pack = queue.get()
        if pack is None:
            logger.Log('Worker exiting')
            return

        logger.Log('Processing {} urls ({}..{})'.format(
            len(pack), pack[0], pack[-1]))
        for result in GetLinkStatuses(pack,
                                      delay=options.delay,
                                      timeout=options.timeout):
            url, status, redirect, size, location = result
            database.UpdateLinkStatus(url=url,
                                      status=status,
                                      redirect=redirect,
                                      size=size,
                                      location=location)

        database.Commit()
        logger.Log('Done processing {} urls ({}..{})'.format(
            len(pack), pack[0], pack[-1]))
Exemplo n.º 3
0
def LinkUpdatingWorker(queue, options, logger):
    database = Database(options.dsn, readonly=False)

    logger = logger.GetPrefixed('writer: ')

    logger.Log('Writer spawned')

    while True:
        pack = queue.get()
        if pack is None:
            logger.Log('Writer exiting')
            return

        for url, status, redirect, size, location in pack:
            database.UpdateLinkStatus(url=url, status=status, redirect=redirect, size=size, location=location)

        database.Commit()
        logger.Log('Updated {} url(s) ({} .. {})'.format(len(pack), pack[0][0], pack[-1][0]))