def test_model(testing_set): """ Test the LSTM model.""" # For reporting current metrics freq_s = options.status_interval * 60 last_s = datetime.now() res = [0.0] * len(model.metrics_names) batches = 0 num_samples = len(testing_set) for status in map_to_model(testing_set, model.test_on_batch): if status is None: break for stat in range(len(status)): res[stat] += status[stat] batches += 1 # Print current metrics every minute if (datetime.now() - last_s).total_seconds() > freq_s: c_metrics = [status / batches for status in res] c_metrics_str = ', '.join([str(model.metrics_names[x]) + ' ' + '%.12f' % (c_metrics[x]) for x in range(len(c_metrics))]) c_metrics_str += ', progress %.4f' % (float(generator.fin_tasks.value) / float(num_samples)) logger.log_info(module_name, 'Status: ' + c_metrics_str) last_s = datetime.now() if batches < 1: logger.log_warning(module_name, 'Testing set did not generate a full batch of data, cannot test') return for stat in range(len(res)): res[stat] /= batches logger.log_info(module_name, 'Results: ' + ', '.join([str(model.metrics_names[x]) + ' ' + str(res[x]) for x in range(len(res))]))
def train_model(training_set): """ Trains the LSTM model.""" start_time = datetime.now() # Checkpointing for saving model weights freq_c = options.checkpoint_interval * 60 last_c = datetime.now() # For reporting current metrics freq_s = options.status_interval * 60 last_s = datetime.now() res = [0.0] * len(model.metrics_names) batches = 0 for status in map_to_model(training_set, model.train_on_batch): if status is None: break for stat in range(len(status)): res[stat] += status[stat] batches += 1 # Print current metrics every minute if (datetime.now() - last_s).total_seconds() > freq_s: c_metrics = [status / batches for status in res] c_metrics_str = ', '.join([ str(model.metrics_names[x]) + ' ' + str(c_metrics[x]) for x in range(len(c_metrics)) ]) logger.log_info(MODULE_NAME, 'Status: ' + c_metrics_str) last_s = datetime.now() # Save current weights at user specified frequency if freq_c > 0 and (datetime.now() - last_c).total_seconds() > freq_c: logger.log_debug(MODULE_NAME, 'Checkpointing weights') try: model.save_weights(options.save_weights) except: generator.stop_generator(10) clean_exit( EXIT_RUNTIME_ERROR, "Failed to save LSTM weights:\n" + str(traceback.format_exc())) last_c = datetime.now() if batches < 1: logger.log_warning( MODULE_NAME, 'Testing set did not generate a full batch of data, cannot test') return for stat in range(len(res)): res[stat] /= batches logger.log_info( MODULE_NAME, 'Results: ' + ', '.join([ str(model.metrics_names[x]) + ' ' + str(res[x]) for x in range(len(res)) ])) logger.log_debug( MODULE_NAME, 'Training finished in ' + str(datetime.now() - start_time)) return res[0] # Average Loss
def add_cache(hash, acc, con): if not os.path.isdir(CACHE_DIR): logger.log_warning(module_name, "Cache directory does not exist, cannot update it") return ofp = os.path.join(CACHE_DIR, hash) data = (acc, con) if not os.path.exists(ofp): with open(ofp, 'wb') as ofile: pickle.dump(data, ofile)
def get_cache(hash): if not is_cached(hash): return None ofp = os.path.join(CACHE_DIR, hash) with open(ofp, 'rb') as ifile: try: return pickle.load(ifile) except Exception as ex: logger.log_warning(module_name, "Failed to access cache: " + str(ex)) return None
def load_file(file_path): file_suffix = os.path.splitext(file_path)[1].lower() if file_suffix == '.json': return load_json_file(file_path) elif file_suffix in ['.yaml', '.yml']: return load_yaml_file(file_path) elif file_suffix == ".csv": return load_csv_file(file_path) else: err_msg = u"Unsupported file format: {}".format(file_path) logger.log_warning(err_msg) return []
def warn_and_debug(has_warned, warning, debug): """ Prints a debug message and also generates a generic warning message if one hasn't been produced before. The point is so we know there were problems without spamming the warning log level. """ if not has_warned: logger.log_warning(module_name, warning) has_warned = True logger.log_debug(module_name, debug) return has_warned
def lookup_bin(name): """ Looks up the path to a bin using Linux environment variables. Not as robust as a program like which, but should be good enough. """ logger.log_debug(module_name, 'PATH = ' + str(os.environ['PATH'])) path_dirs = os.environ['PATH'].split(':') for path_dir in path_dirs: candidate = os.path.join(path_dir, name) if os.path.isfile(candidate): return candidate logger.log_warning(module_name, 'Failed to find ' + str(name)) return '' # Failed to find a match
def eval_worker_loop(temp_dir, sample): o_filename = sample['label'] + '-' + path.basename( sample['base_dir']) + '.gz' o_filepath = path.join(temp_dir, o_filename) logger.log_debug(module_name, 'Writing to ' + o_filepath) with gzip.open(o_filepath, 'wt') as ofile: if options.preprocess: gen_func = reader.read_preprocessed else: gen_func = reader.disasm_pt_file iqueue, oqueue = generator.start_generator(1, gen_func, options.queue_size, options.seq_len, redis_info) if options.preprocess: iqueue.put((None, sample['parsed_filepath'])) else: sample_memory = reader.read_memory_file(sample['mapping_filepath']) if sample_memory is None: logger.log_warning(module_name, 'Failed to parse memory file, skipping') generator.stop_generator(10) return iqueue.put( (None, sample['trace_filepath'], bin_dirpath, sample_memory)) while True: try: res = oqueue.get(True, 5) except: in_service = generator.get_in_service() if in_service == 0: break else: logger.log_debug( module_name, str(in_service) + ' workers still working on jobs') continue xs = res[1][1:] ys = res[1][0] % options.max_classes predict, conf = predict_prob(xs, ys) corr = int(predict == ys) ofile.write( str(corr) + ',' + str(predict) + ',' + str(conf) + ',' + str(ys) + "\n") generator.stop_generator(10)
def map_to_model(samples, f): """ A helper function because train_on_batch() and test_on_batch() are so similar.""" random.shuffle(samples) # There's no point spinning up more worker threads than there are samples threads = min(options.threads, len(samples)) if options.preprocess: gen_func = reader.read_preprocessed else: gen_func = reader.disasm_pt_file # When you gonna fire it up? When you gonna fire it up? iqueue, oqueue = generator.start_generator(threads, gen_func, options.queue_size, options.seq_len, options.embedding_in_dim, options.max_classes, options.batch_size) for sample in samples: if options.preprocess: iqueue.put((None, sample['parsed_filepath'])) else: sample_memory = reader.read_memory_file(sample['mapping_filepath']) if sample_memory is None: logger.log_warning(module_name, 'Failed to parse memory file, skipping') continue iqueue.put((None, sample['trace_filepath'], options.bin_dir, sample_memory, options.timeout)) # Get parsed sequences and feed them to the LSTM model batch_cnt = 0 while True: try: res = oqueue.get(True, 5) except queue.Empty: in_service = generator.get_in_service() if in_service == 0: break else: logger.log_debug(module_name, str(in_service) + ' workers still working on jobs') continue yield f(res[1], res[2]) batch_cnt += 1 logger.log_info(module_name, "Processed " + str(batch_cnt) + " batches, " + str(batch_cnt * options.batch_size) + " samples") generator.stop_generator(10) # End of generator while True: yield None
def insert(seq, dst): seq_len = len(seq) if seq_len > max_seq: logger.log_warning( module_name, 'Tried to insert sequence that exceeds max sequence length') return src_key = str(seq) if src_key in edges[seq_len] and dst in edges[seq_len][src_key]: edges[seq_len][src_key][dst] += 1 elif src_key in edges[seq_len]: edges[seq_len][src_key][dst] = 1 else: edges[seq_len][src_key] = dict() edges[seq_len][src_key][dst] = 1
def kodi_json_request(params): data = json.dumps(params) request = xbmc.executeJSONRPC(data) try: response = json.loads(request) except UnicodeDecodeError: response = json.loads(request.decode('utf-8', 'ignore')) try: if 'result' in response: return response['result'] return None except KeyError: logger.log_warning("[%s] %s" % (params['method'], response['error']['message'])) return None
def load_sets(): if not path.isfile(options.input_sets): clean_exit(EXIT_INVALID_ARGS, "Cannot find file " + str(options.input_sets)) set_key = None try: with open(options.input_sets, 'r') as ifile: for line in ifile: line = line.rstrip() if len(line) < 1: continue if line[0] == '[': set_key = line[1:-1] else: # Line should be the path to a trace directory if not root_dir in line: logger.log_warning(module_name, 'Input data specified with -i must be in ' + str(root_dir) + ', skipping') continue if not path.isdir(line): logger.log_warning(module_name, 'Cannot find directory ' + str(line) + ' to load data from, skipping') continue matches = [record for record in fs if record['base_dir'] == line] if len(matches) < 1: logger.log_warning(module_name, 'Could not find data in directory ' + str(line) + ', skipping') continue sets_meta[set_key].append(matches[0]) except: clean_exit(EXIT_RUNTIME_ERROR, "Failed to load sets from " + str(options.input_sets))
def map_to_model(samples, f): """ A helper function because train_on_batch() and test_on_batch() are so similar.""" global redis_info global oqueue random.shuffle(samples) # There's no point spinning up more worker threads than there are samples threads = min(options.threads, len(samples)) if options.preprocess: gen_func = reader.read_preprocessed else: gen_func = reader.disasm_pt_file # When you gonna fire it up? When you gonna fire it up? iqueue, oqueue = generator.start_generator(threads, gen_func, options.queue_size, options.seq_len, redis_info) for sample in samples: if options.preprocess: iqueue.put((None, sample['parsed_filepath'])) else: sample_memory = reader.read_memory_file(sample['mapping_filepath']) if sample_memory is None: logger.log_warning(module_name, 'Failed to parse memory file, skipping') continue iqueue.put( (None, sample['trace_filepath'], bin_dirpath, sample_memory)) ncpu = cpu_count() workers = Pool(ncpu) res = workers.map(worker_loop, [f] * ncpu) generator.stop_generator(10) return sum(res) / len(res)
def disasm_timeout(proc): """ Termiantes ptxed proc and logs a warning message. """ logger.log_warning(module_name, "Timeout reached, terminating early") proc.kill() DISASM_TIMEOUT.set()
def test_log_warning(caplog): """Test correct warning message is logged""" msg = 'Test warn' logger.log_warning(msg) assert msg in caplog.text assert 'WARNING' in caplog.text
def main(): # Parse input arguments parser = OptionParser( usage='Usage: %prog [options] trace_directory bin_directory') parser.add_option( '-f', '--force', action='store_true', help='If a complete or partial output already exists, overwrite it.') parser.add_option( '-t', '--timeout', action='store', type='int', default=None, help='Max seconds to run before quitting (default: infinite).') parser.add_option( '-p', '--no-partial', action='store_true', help='If timeout is reached, do not save the partially parsed trace.') options, args = parser.parse_args() if len(args) < 2: parser.print_help() sys.exit(0) data_dir = args[0] bin_dir = args[1] logger.log_start(logging.INFO) # Input validation if not os.path.isdir(data_dir): logger.log_error(module_name, data_dir + ' is not a directory') logger.log_stop() sys.exit(1) if not os.path.isdir(bin_dir): logger.log_error(module_name, bin_dir + ' is not a directory') logger.log_stop() sys.exit(1) if options.timeout is None and options.no_partial: logger.log_warning( module_name, "Setting --no-partial without --timeout does nothing") # Make sure all the expected files are there mem_file = None trace_file = None files = os.listdir(data_dir) for file in files: if file == 'mapping.txt' or file == 'mapping.txt.gz': mem_file = os.path.join(data_dir, file) elif file == 'trace_0' or file == 'trace_0.gz': trace_file = os.path.join(data_dir, file) if mem_file is None: logger.log_error( module_name, 'Could not find mapping.txt or mapping.txt.gz in ' + data_dir) logger.log_stop() sys.exit(1) if trace_file is None: logger.log_error(module_name, 'Could not find trace_0 or trace_0.gz in ' + data_dir) logger.log_stop() sys.exit(1) # Parse the memory file mem_map = reader.read_memory_file(mem_file) if mem_map is None: logger.log_error(module_name, 'Failed to parse memory mapping file') logger.log_stop() sys.exit(1) # We're ready to parse the trace o_filepath = os.path.join(data_dir, 'trace_parsed.gz') if os.path.isfile(o_filepath) and not options.force: logger.log_error(module_name, 'Preprocess file already exists') logger.log_stop() sys.exit(1) if os.path.isfile(o_filepath + '.part') and not options.force: logger.log_error(module_name, 'Partial preprocess file already exists') logger.log_stop() sys.exit(1) entries = 0 with gzip.open(o_filepath + '.part', 'wb') as ofile: for instr in reader.disasm_pt_file(trace_file, bin_dir, mem_map, options.timeout): if instr is None: break ofile.write(pack_instr(instr)) entries += 1 if reader.DISASM_TIMEOUT.is_set() and options.no_partial: logger.log_info(module_name, "Deleting partial trace") os.remove(o_filepath + '.part') elif entries > 0: os.rename(o_filepath + '.part', o_filepath) else: logger.log_error(module_name, 'No output produced, empty file') os.remove(o_filepath + '.part') logger.log_stop()
#if loop to determine operations##################################################################### # The ops codes are as followed # create_route: post_users # add_activity_route: put_activity_by_activity_name # get_id_route: get_user_by_id # get_user_route: get_users # get_name_route: get_user_by_name # delete_id_route: delete_user_by_id # delete_name_route: delete_user_by_name # delete_activity_route: delete_activity_by_activity_id duplicated_flag = 0 #The duplicated_flag is used to skip the if loops for duplicated operation for msgID in msgIDList: #This part of code tests for duplicates, if the msg_id is in the dictonary, skip if loops and send back original response if msgID == request['msg_id']: log_warning("REPLICATED msg_in: " + request['msg_id']) qout.write(msgIDList[msgID]) duplicated_flag = 1 break if duplicated_flag == 1: continue if request['method'] == None: log_fail("INVALID METHOD ID " + request['msg_id']) continue #print opnum of the request log_back("###### EXPECTED OP# : " + str(nextTopNum)) log_back("!!!!!! REQUEST OP# : " + str(request['opnum'])) msg_out = Message() #creating empty message object for msg_out key = request['msg_id'] #assigning msg_id to key
def eval_model(eval_set): """ Evaluate the LSTM model.""" random.shuffle(eval_set) # There's no point spinning up more worker threads than there are samples threads = min(options.threads, len(eval_set)) if options.eval_dir is None: eval_dir = tempfile.mkdtemp(suffix='-lstm-pt') else: if not path.exists(options.eval_dir): mkdir(options.eval_dir) eval_dir = options.eval_dir logger.log_info(module_name, 'Evaluation results will be written to ' + eval_dir) if options.preprocess: gen_func = reader.read_preprocessed else: gen_func = reader.disasm_pt_file iqueue, oqueue = generator.start_generator(threads, gen_func, options.queue_size, options.seq_len, options.embedding_in_dim, options.max_classes, options.batch_size) for sample in eval_set: o_filename = sample['label'] + '-' + path.basename(sample['base_dir']) + '.gz' o_filepath = path.join(eval_dir, o_filename) EVAL_WRITE_LOCKS[o_filepath] = threading.Lock() if options.preprocess: iqueue.put((o_filepath, sample['parsed_filepath'])) else: sample_memory = reader.read_memory_file(sample['mapping_filepath']) if sample_memory is None: logger.log_warning(module_name, 'Failed to parse memory file, skipping') continue iqueue.put((o_filepath, sample['trace_filepath'], options.bin_dir, sample_memory, options.timeout)) # Use threads instead of processes to handle and write the prediction # results because I/O and numpy crunching do not require the GIL. EVAL_PRED_DONE.clear() wqueue = queue.Queue(options.queue_size) workers = list() for id in range(threads): worker = threading.Thread(target=eval_worker, args=(wqueue,)) worker.daemon = True worker.start() workers.append(worker) while True: try: res = oqueue.get(True, 5) except queue.Empty: in_service = generator.get_in_service() if in_service == 0: break else: logger.log_debug(module_name, str(in_service) + ' workers still working on jobs') continue wqueue.put([res, model.predict_on_batch(res[1])]) EVAL_PRED_DONE.set() logger.log_debug(module_name, "Waiting for eval workers to terminate") for worker in workers: worker.join() logger.log_debug(module_name, "All eval workers are done") generator.stop_generator(10)
def init_cache(): if not os.path.isdir(CACHE_DIR): try: os.makedirs(CACHE_DIR) except Exception as ex: logger.log_warning(module_name, "Failed to create cache directory: " + str(ex))
def train_model(training_set): """ Trains the LSTM model.""" start_time = datetime.now() # Checkpointing for saving model weights freq_c = options.checkpoint_interval * 60 last_c = datetime.now() last_b = 10000 # For reporting current metrics freq_s = options.status_interval * 60 last_s = datetime.now() res = [0.0] * len(model.metrics_names) batches = 0 num_samples = len(training_set) for status in map_to_model(training_set, model.train_on_batch): if status is None: break for stat in range(len(status)): res[stat] += status[stat] batches += 1 # Print current metrics every minute if (datetime.now() - last_s).total_seconds() > freq_s: c_metrics = [status / batches for status in res] c_metrics_str = ', '.join([str(model.metrics_names[x]) + ' ' + '%.12f' % (c_metrics[x]) for x in range(len(c_metrics))]) c_metrics_str += ', progress %.4f' % (float(generator.fin_tasks.value) / float(num_samples)) logger.log_info(module_name, 'Status: ' + c_metrics_str) last_s = datetime.now() # Save current weights at user specified frequency if freq_c > 0 and (datetime.now() - last_c).total_seconds() > freq_c: logger.log_debug(module_name, 'Checkpointing weights') c_metrics = [status / batches for status in res] if not options.checkpoint_best or c_metrics[0] < last_b: try: model.save_weights(options.save_weights) if not options.multi_gpu is None: template.save_weights(options.save_weights + '.single') except: generator.stop_generator(10) clean_exit(EXIT_RUNTIME_ERROR, "Failed to save LSTM weights:\n" + str(traceback.format_exc())) if options.checkpoint_es and c_metrics[0] > last_b: logger.log_info(module_name, 'Loss did not improve between checkpoints, early stopping and restoring last weights') generator.stop_generator(10) try: model.load_weights(options.save_weights) except: clean_exit(EXIT_RUNTIME_ERROR, "Failed to load LSTM weights:\n" + str(traceback.format_exc())) return last_b = c_metrics[0] last_c = datetime.now() if batches < 1: logger.log_warning(module_name, 'Testing set did not generate a full batch of data, cannot test') return for stat in range(len(res)): res[stat] /= batches logger.log_info(module_name, 'Results: ' + ', '.join([str(model.metrics_names[x]) + ' ' + str(res[x]) for x in range(len(res))])) logger.log_debug(module_name, 'Training finished in ' + str(datetime.now() - start_time)) return res[0] # Average Loss
def add_filter(key): """ Adds a filter from the available_filters dictionary. """ if key in available_filters.keys(): enabled_filters.append(available_filters[key]) else: logger.log_warning(module_name, str(key) + " not in available filters")
def disasm_pt_file(trace_path, bin_path, mem_mapping, timeout=None): """ Disassembles a PT trace into instructions and yields tuples. Each tuple contains the following elements: Source BBID -- the BB from which a transfer is happening Target BBID -- the BB the transfer ends up in Transfer Instruction -- the instruction that causes the transfer (e.g., ret). Full Instruction -- An array containing the parts of the full instruction (e.g., ['call', 'ptr', 'eax']). Full Instruction Size -- The length of the previously mentioned array. Note, the reason why Transfer Instruction and Full Instruction are both in the tuple despite being redundant is for backwards compatibility with older versions of the code. Keyword arguments: trace_path -- The filepath to a raw PT trace (may be gzipped). bin_path -- The path to a directory containing binaries for use by the disassembler. mem_mapping -- A linear array of tuples in the form (start_address, end_address, source_file). timeout -- If not None, the max number of seconds to disasm for. Event DISASM_TIMEOUT is set if timeout is reached. Yields: The tuples described above until EoF is reached, after which None is yielded. """ global mem_map mem_map = mem_mapping # Some regular expressions re_block = re.compile('\[block\]') # Input validation ptxed_path = utils.lookup_bin('ptxed') if ptxed_path == '': logger.log_error(module_name, 'ptxed not found, cannot read ' + str(trace_path)) return if not path.isfile(trace_path): logger.log_error(module_name, str(trace_path) + " does not exist or is not a file") return if not path.isdir(bin_path): logger.log_error(module_name, str(trace_path) + " does not exist or is not a directory") return temp_dir = tempfile.mkdtemp() # If file is gzipped, it must be decompressed first if trace_path[-3:] == '.gz': ifilepath = path.join(temp_dir, 'pt_data') logger.log_debug(module_name, 'Decompressing ' + str(trace_path) + ' into ' + str(ifilepath)) start_time = datetime.now() with gzip.open(trace_path, 'rb') as cfile: with open(ifilepath, 'wb') as ofile: ofile.write(cfile.read()) delta_time = datetime.now() - start_time logger.log_debug(module_name, 'Decompressing ' + str(trace_path) + ' completed in ' + str(delta_time)) else: ifilepath = trace_path # Use ptxed to generate tuples command = [ptxed_path, '--block:show-blocks'] for map in mem_map: start_addr = hex(map[0]) filename = path.basename(map[2].replace("\\", '/')) binpath = path.join(bin_path, filename) if not path.isfile(binpath): logger.log_warning(module_name, binpath + ' does not exist') continue command.append('--raw') command.append(binpath + ':' + start_addr) command.append('--pt') command.append(ifilepath) logger.log_debug(module_name, 'Running ' + ' '.join(command)) start_time = datetime.now() warning_msg = 'Non-critical problems while disasm trace, see debug level (-l) for more info' has_warned = False count = 0 last_bbid = 0 last_instr = None ptxed = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1) DISASM_TIMEOUT.clear() if not timeout is None: watchdog = Timer(timeout, disasm_timeout, args=[ptxed]) watchdog.start() for line in ptxed.stdout: line = line.decode() if re_block.match(line): try: head, start, end, instr = line.split(' ', 3) except ValueError: break # Can happen if watchdog kills ptxed if last_instr is None: # The first basic block doesn't have a previous block, skip it last_instr = instr continue # Extract the type from the previous instruction (e.g., ret) src_type = last_instr.split(' ')[2:] # Convert the target address into a BBID dst_bbid = get_bbid(int(start, 16)) if not dst_bbid is None: yield (last_bbid, dst_bbid, src_type[0], src_type, len(src_type)) last_bbid = dst_bbid count += 1 else: has_warned = warn_and_debug(has_warned, warning_msg, 'Cannot find BBID for address ' + hex(dst_addr)) last_instr = instr continue if not timeout is None: watchdog.cancel() delta_time = datetime.now() - start_time logger.log_info(module_name, 'Generated ' + str(count) + ' entries in ' + str(delta_time)) # Cleanup temp dir shutil.rmtree(temp_dir) # End of generator while True: yield None
def parse_pt_dir(root): """ Parses a directory containing PT traces and meta data. This parser expects the following layout: root/ <pdf_hash>/ info.txt mapping.txt[.gz] trace_0[.gz] [trace_parsed.gz] [report.json.gz] [...] info.txt should contain two lines: the original filename and the ground truth label. mapping.txt (or optionally mapping.txt.gz if gzip compression is used) is the output of the volatility plugin psscan. trace_0 (or optionally trace_0.gz if gzip compression is used) is a raw PT trace. trace_parsed.gz is an optional file generated using preprocess.py. report.json.gz is an optional Cuckoo report file used by syscall.py. Returns: An array where each item contains the following information in dictionary form: directory, trace filepath, memory mapping filepath, info filepath, original filename, and label. Upon error, None is returned. """ if not path.isdir(root): logger.log_error(module_name, str(root) + ' is not a directory') return None res = [] entries = listdir(root) for entry in entries: entry_info = {'base_dir': path.join(root, entry)} if not path.isdir(entry_info['base_dir']): logger.log_debug(module_name, 'Skipping ' + str(entry) + ' because it is not a directory') continue entry_contents = listdir(entry_info['base_dir']) for file in entry_contents: if file == 'info.txt': entry_info['info_filepath'] = path.join(entry_info['base_dir'], file) with open(entry_info['info_filepath'], 'r') as ifile: entry_info['original_filename'] = ifile.readline().strip() entry_info['label'] = ifile.readline().strip() elif file == 'mapping.txt' or file == 'mapping.txt.gz': entry_info['mapping_filepath'] = path.join(entry_info['base_dir'], file) elif file == 'trace_0' or file == 'trace_0.gz': entry_info['trace_filepath'] = path.join(entry_info['base_dir'], file) elif file == 'trace_parsed.gz': entry_info['parsed_filepath'] = path.join(entry_info['base_dir'], file) elif file == 'report.json.gz': entry_info['cuckoo_report'] = path.join(entry_info['base_dir'], file) if len(entry_info.keys()) < 6: logger.log_warning(module_name, 'Could not find all the necessary files in ' + str(root) + ' skipping') logger.log_debug(module_name, 'Found keys: ' + str(entry_info.keys())) else: logger.log_debug(module_name, 'Adding entry with keys: ' + str(entry_info.keys())) res.append(entry_info) return res
errors = True if not options.multi_gpu is None and options.multi_gpu < 2: logger.log_error(module_name, 'Value for multi-GPU mode option must be at least 2') errors = True if not options.preprocess and options.bin_dir is None: logger.log_error(module_name, 'Preprocessing (-p) is not set, so (--bin-dir) is required') errors = True if not options.bin_dir is None and not path.isdir(options.bin_dir): logger.log_error(module_name, 'Binary directory (--bin-dir) must be a directory') errors = True if options.checkpoint_interval == 0 and (options.checkpoint_best or options.checkpoint_es): logger.log_warning(module_name, 'Setting --checkpoint-best or --checkpoint-early-stop without --checkpoint does nothing') if options.learn_ret: filters.add_filter('ret') if options.learn_call: filters.add_filter('call') if options.learn_icall: filters.add_filter('icall') if options.learn_jmp: filters.add_filter('jmp') if options.learn_ijmp: filters.add_filter('ijmp')