Пример #1
0
def parse_ips(jobs: Queue):
    session = setup_connection()

    counter = 0
    start_time = time.time()
    while True:
        block = jobs.get()
        if block is None:
            break

        ip_start = block.inetnum.split(" - ")[0]
        ip_end = block.inetnum.split(" - ")[1]

        cidrs = iprange_to_cidrs(ip_start, ip_end)
        for c in cidrs:
            session.add(Cidr(cidr=str(c), block=block))
        counter += 1
        if counter % COMMIT_COUNT == 0:
            session.commit()
            session.close()
            session = setup_connection()
            logger.debug("committed cidrs ({} seconds)".format(round(time.time() - start_time, 2)))
            counter = 0
            start_time = time.time()

    session.commit()
    logger.debug("committed last cidrs")
    session.close()
    logger.debug("{} finished".format(current_process().name))
Пример #2
0
def parse_blocks(jobs: Queue, connection_string: str):
    session = setup_connection(connection_string)

    counter = 0
    BLOCKS_DONE = 0

    start_time = time.time()
    while True:
        block = jobs.get()
        if block is None:
            break

        inetnum = parse_property_inetnum(block)
        netname = parse_property(block, 'netname')
        description = parse_property(block, 'descr')
        country = parse_property(block, 'country')
        maintained_by = parse_property(block, 'mnt-by')
        created = parse_property(block, 'created')
        last_modified = parse_property(block, 'last-modified')
        source = parse_property(block, 'cust_source')

        b = Block(inetnum=inetnum,
                  netname=netname,
                  description=description,
                  country=country,
                  maintained_by=maintained_by,
                  created=created,
                  last_modified=last_modified,
                  source=source)

        session.add(b)
        counter += 1
        BLOCKS_DONE += 1
        if counter % COMMIT_COUNT == 0:
            session.commit()
            session.close()
            session = setup_connection(connection_string)
            # not really accurate at the moment
            percent = (BLOCKS_DONE * NUM_WORKERS * 100) / NUM_BLOCKS
            if percent > 100:
                percent = 100
            logger.debug(
                'committed {} blocks ({} seconds) {:.1f}% done.'.format(
                    counter, round(time.time() - start_time, 2), percent))
            counter = 0
            start_time = time.time()
    session.commit()
    logger.debug('committed last blocks')
    session.close()
    logger.debug('{} finished'.format(current_process().name))
Пример #3
0
def main(connection_string):
    overall_start_time = time.time()
    session = setup_connection(connection_string, create_db=True)

    for entry in FILELIST:
        global CURRENT_FILENAME
        CURRENT_FILENAME = entry
        f_name = f"./databases/{entry}"
        if os.path.exists(f_name):
            logger.info(f"parsing database file: {f_name}")
            start_time = time.time()
            blocks = read_blocks(f_name)
            logger.info(
                f"database parsing finished: {round(time.time() - start_time, 2)} seconds"
            )

            logger.info('parsing blocks')
            start_time = time.time()

            jobs = Queue()

            workers = []
            # start workers
            logger.debug(f"starting {NUM_WORKERS} processes")
            for w in range(NUM_WORKERS):
                p = Process(target=parse_blocks,
                            args=(
                                jobs,
                                connection_string,
                            ),
                            daemon=True)
                p.start()
                workers.append(p)

            # add tasks
            for b in blocks:
                jobs.put(b)
            for i in range(NUM_WORKERS):
                jobs.put(None)
            jobs.close()
            jobs.join_thread()

            # wait to finish
            for p in workers:
                p.join()

            logger.info(
                f"block parsing finished: {round(time.time() - start_time, 2)} seconds"
            )
        else:
            logger.info(
                f"File {f_name} not found. Please download using download_dumps.sh"
            )

    CURRENT_FILENAME = "empty"
    logger.info(
        f"script finished: {round(time.time() - overall_start_time, 2)} seconds"
    )
Пример #4
0
def main(connection_string):
    overall_start_time = time.time()
    session = setup_connection(connection_string, create_db=True)

    for entry in FILELIST:
        global CURRENT_FILENAME
        CURRENT_FILENAME = entry
        f_name = "./databases/{}".format(entry)
        if os.path.exists(f_name):
            logger.info('parsing database file: {}'.format(f_name))
            start_time = time.time()
            blocks = read_blocks(f_name)
            logger.info('database parsing finished: {} seconds'.format(
                round(time.time() - start_time, 2)))

            logger.info('parsing blocks')
            start_time = time.time()

            jobs = Queue()

            workers = []
            # start workers
            logger.debug('starting {} processes'.format(NUM_WORKERS))
            for w in range(NUM_WORKERS):
                p = Process(target=parse_blocks,
                            args=(
                                jobs,
                                connection_string,
                            ))
                p.start()
                workers.append(p)

            # add tasks
            for b in blocks:
                jobs.put(b)
            for i in range(NUM_WORKERS):
                jobs.put(None)

            # wait to finish
            for p in workers:
                p.join()

            logger.info('block parsing finished: {} seconds'.format(
                round(time.time() - start_time, 2)))
        else:
            logger.info(
                'File {} not found. Please download using download_dumps.sh'.
                format(f_name))

    CURRENT_FILENAME = "empty"
    logger.info('script finished: {} seconds'.format(
        round(time.time() - overall_start_time, 2)))
Пример #5
0
def parse_blocks(jobs: Queue):
    session = setup_connection()

    counter = 0
    start_time = time.time()
    while True:
        block = jobs.get()
        if block is None:
            break

        inetnum = parse_property(block, "inetnum")
        netname = parse_property(block, "netname")
        description = parse_property(block, "descr")
        country = parse_property(block, "country")
        maintained_by = parse_property(block, "mnt-by")
        created = parse_property(block, "created")
        last_modified = parse_property(block, "last-modified")
        b = Block(
            inetnum=inetnum,
            netname=netname,
            description=description,
            country=country,
            maintained_by=maintained_by,
            created=created,
            last_modified=last_modified,
        )
        session.add(b)
        counter += 1
        if counter % COMMIT_COUNT == 0:
            session.commit()
            session.close()
            session = setup_connection()
            logger.debug("committed {} blocks ({} seconds)".format(counter, round(time.time() - start_time, 2)))
            counter = 0
            start_time = time.time()
    session.commit()
    logger.debug("committed last blocks")
    session.close()
    logger.debug("{} finished".format(current_process().name))
Пример #6
0
def main():
    overall_start_time = time.time()

    session = setup_connection(create_db=True)

    logger.info("parsing ripe database")
    start_time = time.time()
    blocks = read_blocks()
    logger.info("ripe database parsing finished: {} seconds".format(round(time.time() - start_time, 2)))

    logger.info("parsing blocks")
    start_time = time.time()

    jobs = Queue()

    workers = []
    # start workers
    logger.debug("starting {} processes".format(NUM_WORKERS))
    for w in range(NUM_WORKERS):
        p = Process(target=parse_blocks, args=(jobs,))
        p.start()
        workers.append(p)

    # add tasks
    for b in blocks:
        jobs.put(b)
    for i in range(NUM_WORKERS):
        jobs.put(None)

    # wait to finish
    for p in workers:
        p.join()

    logger.info("block parsing finished: {} seconds".format(round(time.time() - start_time, 2)))

    logger.info("parsing IPs")

    start_time = time.time()
    jobs = Queue()

    workers = []
    # start workers
    logger.debug("starting {} processes".format(NUM_WORKERS))
    for w in range(NUM_WORKERS):
        p = Process(target=parse_ips, args=(jobs,))
        p.start()
        workers.append(p)

    # add tasks
    logger.debug("populating job queue")
    for b in session.query(Block):
        jobs.put(b)
    session.close()
    for i in range(NUM_WORKERS):
        jobs.put(None)
    logger.debug("job queue populated")

    # wait to finish
    for p in workers:
        p.join()

    logger.info("ip parsing finished: {} seconds".format(round(time.time() - start_time, 2)))

    logger.info("script finished: {} seconds".format(round(time.time() - overall_start_time, 2)))