def parse_ips(jobs: Queue): session = setup_connection() counter = 0 start_time = time.time() while True: block = jobs.get() if block is None: break ip_start = block.inetnum.split(" - ")[0] ip_end = block.inetnum.split(" - ")[1] cidrs = iprange_to_cidrs(ip_start, ip_end) for c in cidrs: session.add(Cidr(cidr=str(c), block=block)) counter += 1 if counter % COMMIT_COUNT == 0: session.commit() session.close() session = setup_connection() logger.debug("committed cidrs ({} seconds)".format(round(time.time() - start_time, 2))) counter = 0 start_time = time.time() session.commit() logger.debug("committed last cidrs") session.close() logger.debug("{} finished".format(current_process().name))
def parse_blocks(jobs: Queue, connection_string: str): session = setup_connection(connection_string) counter = 0 BLOCKS_DONE = 0 start_time = time.time() while True: block = jobs.get() if block is None: break inetnum = parse_property_inetnum(block) netname = parse_property(block, 'netname') description = parse_property(block, 'descr') country = parse_property(block, 'country') maintained_by = parse_property(block, 'mnt-by') created = parse_property(block, 'created') last_modified = parse_property(block, 'last-modified') source = parse_property(block, 'cust_source') b = Block(inetnum=inetnum, netname=netname, description=description, country=country, maintained_by=maintained_by, created=created, last_modified=last_modified, source=source) session.add(b) counter += 1 BLOCKS_DONE += 1 if counter % COMMIT_COUNT == 0: session.commit() session.close() session = setup_connection(connection_string) # not really accurate at the moment percent = (BLOCKS_DONE * NUM_WORKERS * 100) / NUM_BLOCKS if percent > 100: percent = 100 logger.debug( 'committed {} blocks ({} seconds) {:.1f}% done.'.format( counter, round(time.time() - start_time, 2), percent)) counter = 0 start_time = time.time() session.commit() logger.debug('committed last blocks') session.close() logger.debug('{} finished'.format(current_process().name))
def main(connection_string): overall_start_time = time.time() session = setup_connection(connection_string, create_db=True) for entry in FILELIST: global CURRENT_FILENAME CURRENT_FILENAME = entry f_name = f"./databases/{entry}" if os.path.exists(f_name): logger.info(f"parsing database file: {f_name}") start_time = time.time() blocks = read_blocks(f_name) logger.info( f"database parsing finished: {round(time.time() - start_time, 2)} seconds" ) logger.info('parsing blocks') start_time = time.time() jobs = Queue() workers = [] # start workers logger.debug(f"starting {NUM_WORKERS} processes") for w in range(NUM_WORKERS): p = Process(target=parse_blocks, args=( jobs, connection_string, ), daemon=True) p.start() workers.append(p) # add tasks for b in blocks: jobs.put(b) for i in range(NUM_WORKERS): jobs.put(None) jobs.close() jobs.join_thread() # wait to finish for p in workers: p.join() logger.info( f"block parsing finished: {round(time.time() - start_time, 2)} seconds" ) else: logger.info( f"File {f_name} not found. Please download using download_dumps.sh" ) CURRENT_FILENAME = "empty" logger.info( f"script finished: {round(time.time() - overall_start_time, 2)} seconds" )
def main(connection_string): overall_start_time = time.time() session = setup_connection(connection_string, create_db=True) for entry in FILELIST: global CURRENT_FILENAME CURRENT_FILENAME = entry f_name = "./databases/{}".format(entry) if os.path.exists(f_name): logger.info('parsing database file: {}'.format(f_name)) start_time = time.time() blocks = read_blocks(f_name) logger.info('database parsing finished: {} seconds'.format( round(time.time() - start_time, 2))) logger.info('parsing blocks') start_time = time.time() jobs = Queue() workers = [] # start workers logger.debug('starting {} processes'.format(NUM_WORKERS)) for w in range(NUM_WORKERS): p = Process(target=parse_blocks, args=( jobs, connection_string, )) p.start() workers.append(p) # add tasks for b in blocks: jobs.put(b) for i in range(NUM_WORKERS): jobs.put(None) # wait to finish for p in workers: p.join() logger.info('block parsing finished: {} seconds'.format( round(time.time() - start_time, 2))) else: logger.info( 'File {} not found. Please download using download_dumps.sh'. format(f_name)) CURRENT_FILENAME = "empty" logger.info('script finished: {} seconds'.format( round(time.time() - overall_start_time, 2)))
def parse_blocks(jobs: Queue): session = setup_connection() counter = 0 start_time = time.time() while True: block = jobs.get() if block is None: break inetnum = parse_property(block, "inetnum") netname = parse_property(block, "netname") description = parse_property(block, "descr") country = parse_property(block, "country") maintained_by = parse_property(block, "mnt-by") created = parse_property(block, "created") last_modified = parse_property(block, "last-modified") b = Block( inetnum=inetnum, netname=netname, description=description, country=country, maintained_by=maintained_by, created=created, last_modified=last_modified, ) session.add(b) counter += 1 if counter % COMMIT_COUNT == 0: session.commit() session.close() session = setup_connection() logger.debug("committed {} blocks ({} seconds)".format(counter, round(time.time() - start_time, 2))) counter = 0 start_time = time.time() session.commit() logger.debug("committed last blocks") session.close() logger.debug("{} finished".format(current_process().name))
def main(): overall_start_time = time.time() session = setup_connection(create_db=True) logger.info("parsing ripe database") start_time = time.time() blocks = read_blocks() logger.info("ripe database parsing finished: {} seconds".format(round(time.time() - start_time, 2))) logger.info("parsing blocks") start_time = time.time() jobs = Queue() workers = [] # start workers logger.debug("starting {} processes".format(NUM_WORKERS)) for w in range(NUM_WORKERS): p = Process(target=parse_blocks, args=(jobs,)) p.start() workers.append(p) # add tasks for b in blocks: jobs.put(b) for i in range(NUM_WORKERS): jobs.put(None) # wait to finish for p in workers: p.join() logger.info("block parsing finished: {} seconds".format(round(time.time() - start_time, 2))) logger.info("parsing IPs") start_time = time.time() jobs = Queue() workers = [] # start workers logger.debug("starting {} processes".format(NUM_WORKERS)) for w in range(NUM_WORKERS): p = Process(target=parse_ips, args=(jobs,)) p.start() workers.append(p) # add tasks logger.debug("populating job queue") for b in session.query(Block): jobs.put(b) session.close() for i in range(NUM_WORKERS): jobs.put(None) logger.debug("job queue populated") # wait to finish for p in workers: p.join() logger.info("ip parsing finished: {} seconds".format(round(time.time() - start_time, 2))) logger.info("script finished: {} seconds".format(round(time.time() - overall_start_time, 2)))