def cleanup(force=None): """ Delete queries until EXPIRE config is satisfied: 1 - Delete anything older than EXPIRE_TIME seconds 2 - Delete the oldest queries until we have at least EXPIRE_SPACE bytes free """ period = parse_duration(Config.get('CLEANUP_PERIOD', 0)) now = datetime.utcnow() global _LAST_CLEANED if not force and period and _LAST_CLEANED + period < now: Config.logger.debug("Cleaned recently, aborting: {}".format( _LAST_CLEANED.strftime(Config.get('DATE_FORMAT')))) return _LAST_CLEANED = datetime.utcnow() from heapq import heapify, heappop Config.logger.info("Running Cleanup: {}".format(now.strftime(ISOFORMAT))) ids = Query.get_unexpired() ordered = [(file_modified(Query.job_path_for_id(i)), i) for i in ids] heapify(ordered) Config.logger.info("Cleaning: {}".format( {v: o.strftime(ISOFORMAT) for o, v in ordered})) if EXPIRE_TIME: expiring = [] while ordered and ordered[0][0] + EXPIRE_TIME < now: _, q_id = heappop(ordered) expiring.append(q_id) Config.logger.info("Deleting old queries: {}".format(expiring)) expiring = Query.expire_now(expiring) if expiring: Config.logger.error("Couldn't delete: {}".format(expiring)) if EXPIRE_SPACE > 0: free_bytes = spool_space().bytes while ordered and free_bytes < EXPIRE_SPACE: _, q_id = heappop(ordered) Config.logger.info("Deleting for space: {}".format(q_id)) Query.expire_now(q_id) free_bytes = spool_space().bytes
def merge(query_tuple): """ Runs in the 'io' worker merges multiple pcap results using wireshark's mergecap tool """ query = Query(qt=query_tuple) if not query.load(): Config.logger.debug("DEBUG: failed to load [{}]".format(query.id)) query.progress('merge', 'starting merge', Query.MERGE) files = [query.path(f) for f in readdir(query.job_path, endswith='.pcap')] if len(files) > 1: Config.logger.debug("Merging: {}".format(','.join(files))) merged_file = query.path('merged.tmp') cmd = ["/usr/sbin/mergecap", "-F", "pcap", "-w", merged_file] cmd.extend(files) from subprocess import call status_code = call(cmd) # Cleanup temporary files if status_code == 0: query.progress('merge', "merge complete, finalizing") # make the merged file available (rename is atomic) os.rename(merged_file, query.path('{}.pcap'.format(MERGED_NAME))) Config.logger.debug("Removing temp files: {}".format(str(files))) for item in files: os.remove(item) query.complete() else: query.error('merge', "{} returned {}".format(cmd, status_code)) elif files: os.rename(files[0], query.path('{}.pcap'.format(MERGED_NAME))) query.complete() else: query.error('merge', "Nothing to merge ?!?") query.save(to_file=True) cleanup.apply_async(queue='io')
def query_task(query_tuple, headers=None): """ manage the threads that query stenographer. Eliminate duplicate queries and ensure order """ query = Query(qt=query_tuple) if query.invalid: Config.logger.error( "Failed to instantiate query from {}".format(query_tuple)) return query.progress('query_task', 'Starting requests') Config.logger.debug("query: {}".format(query.id)) # detect duplicates, and update their timestamps to forestall deletion from os import mkdir try: mkdir(query.job_path, 0750) # mkdir throws OSError if directory exists except OSError: # NOTE: python 3 throws the subclass: FileExistsError Config.logger.info("query: duplicate request {}".format(query.id)) os.utime(query.job_path, times=None) return threads = [] # Query each instance concurrently for instance in _INSTANCES: from threading import Thread thread = Thread(target=_requester, args=(query, instance, headers)) thread.start() threads.append(thread) # Wait until all threads complete. alive = True while alive: query.save() alive = False for thread in threads: thread.join(1.0) alive = alive or thread.is_alive() errors = query.errors if errors: query.error('query_task', "stenographer queries complete", Query.FAIL) elif query.successes: query.progress('query_task', "stenographer queries complete", Query.RECEIVED) else: query.progress('query_task', 'stenographer queries completed. No packets returned', Query.SUCCESS) query.save() if query.successes: merge.apply_async(queue='io', kwargs={'query_tuple': query.tupify()})