def test_all_started(self): ThreadList = multiscanner._start_module_threads( self.filelist, utils.parseDir(os.path.join(CWD, "modules")), self.config, self.global_module_interface) time.sleep(.001) for t in ThreadList: assert t.started
def _init(args): # Initialize configuration file if os.path.isfile(args.config): print('Warning:', args.config, 'already exists, overwriting will destroy changes') try: answer = input( 'Do you wish to overwrite the configuration file [y/N]:') except EOFError: answer = 'N' if answer == 'y': config_init(args.config) else: print('Checking for missing modules in configuration...') ModuleList = parseDir(MODULESDIR, recursive=True, exclude=["__init__"]) config = configparser.SafeConfigParser() config.optionxform = str config.read(args.config) _write_missing_module_configs(ModuleList, config, filepath=args.config) else: config_init(args.config) # Init storage config = configparser.SafeConfigParser() config.optionxform = str config.read(args.config) config = _get_main_config(config) if os.path.isfile(config["storage-config"]): print('Warning:', config["storage-config"], 'already exists, overwriting will destroy changes') try: answer = input( 'Do you wish to overwrite the configuration file [y/N]:') except EOFError: answer = 'N' if answer == 'y': storage.config_init(config["storage-config"], overwrite=True) print('Storage configuration file initialized at', config["storage-config"]) else: print('Checking for missing modules in storage configuration...') storage.config_init(config["storage-config"], overwrite=False) else: storage.config_init(config["storage-config"]) print('Storage configuration file initialized at', config["storage-config"]) exit(0)
def config_init(filepath, module_list=parseDir(MODULESDIR, recursive=True, exclude=["__init__"])): """ Creates a new config file at filepath filepath - The config file to create """ config = configparser.SafeConfigParser() config.optionxform = str if filepath: _rewrite_config(module_list, config, filepath) else: filepath = determine_configuration_path(filepath) _rewrite_config(module_list, config, filepath) print('Configuration file initialized at', filepath)
def modules(): ''' Return a list of module names available for MultiScanner to use, and whether or not they are enabled in the config. ''' files = utils.parseDir(MODULESDIR, True) filenames = [os.path.splitext(os.path.basename(f)) for f in files] module_names = [m[0] for m in filenames if m[1] == '.py'] ms_config = configparser.SafeConfigParser() ms_config.optionxform = str ms_config.read(MS_CONFIG) modules = {} for module in module_names: try: modules[module] = ms_config.get(module, 'ENABLED') except (configparser.NoSectionError, configparser.NoOptionError): pass return jsonify({'Modules': modules})
def _init(args): # Initialize configuration file if os.path.isfile(args.config): print('Warning:', args.config, 'already exists, overwriting will destroy changes') try: answer = input('Do you wish to overwrite the configuration file [y/N]:') except EOFError: answer = 'N' if answer == 'y': config_init(args.config) else: print('Checking for missing modules in configuration...') ModuleList = parseDir(MODULESDIR, recursive=True, exclude=["__init__"]) config = configparser.SafeConfigParser() config.optionxform = str config.read(args.config) _write_missing_module_configs(ModuleList, config, filepath=args.config) else: config_init(args.config) # Init storage config = configparser.SafeConfigParser() config.optionxform = str config.read(args.config) config = _get_main_config(config) if os.path.isfile(config["storage-config"]): print('Warning:', config["storage-config"], 'already exists, overwriting will destroy changes') try: answer = input('Do you wish to overwrite the configuration file [y/N]:') except EOFError: answer = 'N' if answer == 'y': storage.config_init(config["storage-config"], overwrite=True) print('Storage configuration file initialized at', config["storage-config"]) else: print('Checking for missing modules in storage configuration...') storage.config_init(config["storage-config"], overwrite=False) else: storage.config_init(config["storage-config"]) print('Storage configuration file initialized at', config["storage-config"]) exit(0)
def _get_storage_classes(dir_path=STORAGE_DIR): storage_classes = {} dir_list = utils.parseDir(dir_path, recursive=True) dir_list.remove(os.path.join(dir_path, 'storage.py')) # dir_list.remove(os.path.join(dir_path, '__init__.py')) dir_list.remove(os.path.join(dir_path, 'sql_driver.py')) for filename in dir_list: if filename.endswith('.py'): modname = os.path.basename(filename[:-3]) moddir = os.path.dirname(filename) mod = utils.load_module(os.path.basename(modname), [moddir]) if not mod: print(filename, 'not a valid storage module...') continue for member_name in dir(mod): member = getattr(mod, member_name) if inspect.isclass(member) and issubclass(member, Storage): storage_classes[member_name] = member() if 'Storage' in storage_classes: del storage_classes['Storage'] return storage_classes
from __future__ import division, absolute_import, with_statement, print_function, unicode_literals import os import tempfile import multiscanner from multiscanner.common import utils # Makes sure we use the multiscanner in ../ CWD = os.path.dirname(os.path.abspath(__file__)) module_list = [os.path.join(CWD, 'modules', 'test_conf.py')] filelist = utils.parseDir(os.path.join(CWD, 'files')) def test_no_config(): results, metadata = multiscanner.multiscan(filelist, configfile=None, config=None, recursive=None, module_list=module_list)[0] assert metadata['conf'] == {'a': 'b', 'c': 'd'} def test_config_api_no_file(): config = {'test_conf': {'a': 'z'}} results, metadata = multiscanner.multiscan(filelist, configfile=None, config=config, recursive=None, module_list=module_list)[0] assert metadata['conf'] == {'a': 'z', 'c': 'd'}
from multiscanner.common import utils from multiscanner.distributed import celery_worker from multiscanner.storage.sql_driver import Database try: from StringIO import StringIO as BytesIO except ImportError: from io import BytesIO CWD = os.path.dirname(os.path.abspath(__file__)) MS_WD = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Get a subset of simple modules to run in testing # the celery worker MODULE_LIST = utils.parseDir(multiscanner.MODULESDIR, recursive=True) DESIRED_MODULES = [ 'entropy.py', 'MD5.py', 'SHA1.py', 'SHA256.py', 'libmagic.py', 'ssdeep.py' ] MODULES_TO_TEST = [i for e in DESIRED_MODULES for i in MODULE_LIST if e in i] TEST_DB_PATH = os.path.join(CWD, 'testing.db') if os.path.exists(TEST_DB_PATH): os.remove(TEST_DB_PATH) DB_CONF = Database.DEFAULTCONF DB_CONF['db_name'] = TEST_DB_PATH # Metadata about test file TEST_FULL_PATH = os.path.join(CWD, 'files/123.txt') TEST_ORIGINAL_FILENAME = TEST_FULL_PATH.split('/')[-1] TEST_TASK_ID = 1
def scan(filelist, conf=DEFAULTCONF): ruleDir = conf["ruledir"] extlist = conf["fileextensions"] includes = 'includes' in conf and conf['includes'] ruleset = {} rules = parseDir(ruleDir, recursive=True) for r in rules: for ext in extlist: if r.endswith(ext): full_path = os.path.abspath(os.path.join(ruleDir, r)) ruleset[full_path] = full_path break # Ran into a weird issue with file locking, this fixes it goodtogo = False i = 0 yararules = None while not goodtogo: try: yararules = yara.compile(filepaths=ruleset, includes=includes) goodtogo = True except yara.SyntaxError as e: bad_file = os.path.abspath(str(e).split('(')[0]) if bad_file in ruleset: del ruleset[bad_file] print('WARNING: Yara', e) else: print('ERROR Yara: Invalid rule in', bad_file, 'but we are unable to remove it from our list. Aborting') print(e) return None matches = [] for m in filelist: # Ran into a weird issue with file locking, this fixes it goodtogo = False i = 0 while not goodtogo and i < 5: try: f = open(m, 'rb') goodtogo = True except Exception as e: print('yara:', e) time.sleep(3) i += 1 try: hit = yararules.match(data=f.read()) except Exception as e: # TODO: log exception continue finally: f.close() if hit: hdict = {} for h in hit: if not set(h.tags).intersection(set(conf["ignore-tags"])): hit_dict = { 'meta': h.meta, 'namespace': h.namespace, 'rule': h.rule, 'tags': h.tags, } try: h_key = '{}:{}'.format(hit_dict['namespace'].split('/')[-1], hit_dict['rule']) except IndexError: h_key = '{}'.format(hit_dict['rule']) hdict[h_key] = hit_dict matches.append((m, hdict)) metadata = {} rulelist = list(ruleset) rulelist.sort() metadata["Name"] = NAME metadata["Type"] = TYPE metadata["Rules"] = rulelist return (matches, metadata)
def multiscan(Files, recursive=False, configregen=False, configfile=CONFIG, config=None, module_list=None): """ The meat and potatoes. Returns the list of module results Files - A list of files and dirs to be scanned recursive - If true it will search the dirs in Files recursively configregen - If True a new config file will be created overwriting the old configfile - What config file to use. Can be None. config - A dictionary containing the configuration options to be used. module_list - A list of file paths to be used as modules. Each string should end in .py """ # Redirect stdout to stderr stdout = sys.stdout sys.stdout = sys.stderr # TODO: Make sure the cleanup from this works is something breaks # Init some vars # If recursive is False we don't parse the file list and take it as is. if recursive: filelist = parseFileList(Files, recursive=recursive) else: filelist = Files # A list of files in the module dir if module_list is None: module_list = parseDir(MODULESDIR, recursive=True, exclude=["__init__"]) # A dictionary used for the copyfileto parameter filedic = {} # What will be the config file object config_object = None # Read in config if configfile: config_object = configparser.SafeConfigParser() config_object.optionxform = str # Regen the config if needed or wanted if configregen or not os.path.isfile(configfile): _rewrite_config(module_list, config_object, filepath=configfile) config_object.read(configfile) main_config = _get_main_config(config_object, filepath=configfile) if config: file_conf = parse_config(config_object) for key in config: if key not in file_conf: file_conf[key] = config[key] file_conf[key]['_load_default'] = True else: file_conf[key].update(config[key]) config = file_conf else: config = parse_config(config_object) else: if config is None: config = {} else: config['_load_default'] = True if 'main' in config: main_config = config['main'] else: main_config = DEFAULTCONF # If none of the files existed if not filelist: sys.stdout = stdout raise ValueError("No valid files") # Copy files to a share if configured if "copyfilesto" not in main_config: main_config["copyfilesto"] = False if main_config["copyfilesto"]: if os.path.isdir(main_config["copyfilesto"]): filelist = _copy_to_share(filelist, filedic, main_config["copyfilesto"]) else: sys.stdout = stdout raise IOError('The copyfilesto dir "' + main_config["copyfilesto"] + '" is not a valid dir') # Create the global module interface global_module_interface = _GlobalModuleInterface() # Start a thread for each module thread_list = _start_module_threads(filelist, module_list, config, global_module_interface) # Write the default configure settings for missing ones if config_object: _write_missing_module_configs(module_list, config_object, filepath=configfile) # Warn about spaces in file names for f in filelist: if ' ' in f: print( 'WARNING: You are using file paths with spaces. This may result in modules not reporting correctly.' ) break # Wait for all threads to finish thread_wait_list = thread_list[:] i = 0 while thread_wait_list: i += 1 for thread in thread_wait_list: if not thread.is_alive(): i = 0 thread_wait_list.remove(thread) if VERBOSE: print(thread.name, "took", thread.endtime - thread.starttime) if i == 15: i = 0 if VERBOSE: p = 'Waiting on' for thread in thread_wait_list: p += ' ' + thread.name p += '...' print(p) time.sleep(1) # Delete copied files if main_config["copyfilesto"]: for item in filelist: try: os.remove(item) except OSError: pass # Get Result list results = [] for thread in thread_list: if thread.ret is not None: results.append(thread.ret) del thread # Translates file names back to the originals if filedic: # I have no idea if this is the best way to do in-place modifications for i in range(0, len(results)): (result, metadata) = results[i] modded = False for j in range(0, len(result)): (filename, hit) = result[j] base = basename(filename) if base in filedic: filename = filedic[base] modded = True result[j] = (filename, hit) if modded: results[i] = (result, metadata) # Scan subfiles if needed subscan_list = global_module_interface._get_subscan_list() if subscan_list: # Translate from_filename back to original if needed if filedic: for i in range(0, len(subscan_list)): file_path, from_filename, module_name = subscan_list[i] base = basename(from_filename) if base in filedic: from_filename = filedic[base] subscan_list[i] = (file_path, from_filename, module_name) results.extend( _subscan(subscan_list, config, main_config, module_list, global_module_interface)) global_module_interface._cleanup() # Return stdout to previous state sys.stdout = stdout return results
def create_task(): ''' Create a new task for a submitted file. Save the submitted file to UPLOAD_FOLDER, optionally unzipping it. Return task id and 201 status. ''' file_ = request.files['file'] if request.form.get('upload_type', None) == 'import': try: task_id = import_task(file_) except KeyError: return make_response( jsonify({ 'Message': 'Cannot import report missing \'Scan Time\' field!' }), HTTP_BAD_REQUEST) except InvalidScanTimeFormatError: return make_response( jsonify({ 'Message': 'Cannot import report with \'Scan Time\' of invalid format!' }), HTTP_BAD_REQUEST) except (UnicodeDecodeError, ValueError): return make_response( jsonify({'Message': 'Cannot import non-JSON files!'}), HTTP_BAD_REQUEST) return make_response(jsonify({'Message': { 'task_ids': [task_id] }}), HTTP_CREATED) original_filename = file_.filename metadata = {} task_id_list = [] extract_dir = None rescan = False for key in request.form.keys(): if key in ['file_id', 'archive-password', 'upload_type' ] or request.form[key] == '': continue elif key == 'duplicate': if request.form[key] == 'latest': rescan = False elif request.form[key] == 'rescan': rescan = True elif key == 'modules': module_names = request.form[key] files = utils.parseDir(MODULESDIR, True) modules = [] for f in files: split = os.path.splitext(os.path.basename(f)) if split[0] in module_names and split[1] == '.py': modules.append(f) elif key == 'archive-analyze' and request.form[key] == 'true': extract_dir = api_config['api']['upload_folder'] if not os.path.isdir(extract_dir): return make_response( jsonify({ 'Message': "'upload_folder' in API config is not " "a valid folder!" }), HTTP_BAD_REQUEST) # Get password if present if 'archive-password' in request.form: password = request.form['archive-password'] if PY3: password = bytes(password, 'utf-8') else: password = '' else: metadata[key] = request.form[key] if extract_dir: # Extract a zip if zipfile.is_zipfile(file_): z = zipfile.ZipFile(file_) try: # NOTE: zipfile module prior to Py 2.7.4 is insecure! # https://docs.python.org/2/library/zipfile.html#zipfile.ZipFile.extract z.extractall(path=extract_dir, pwd=password) for uzfile in z.namelist(): unzipped_file = open(os.path.join(extract_dir, uzfile)) f_name, full_path = save_hashed_filename( unzipped_file, True) tid = queue_task(uzfile, f_name, full_path, metadata, rescan=rescan) task_id_list.append(tid) except RuntimeError as e: msg = "ERROR: Failed to extract " + str(file_) + ' - ' + str(e) return make_response(jsonify({'Message': msg}), HTTP_BAD_REQUEST) # Extract a rar elif rarfile.is_rarfile(file_): r = rarfile.RarFile(file_) try: r.extractall(path=extract_dir, pwd=password) for urfile in r.namelist(): unrarred_file = open(os.path.join(extract_dir, urfile)) f_name, full_path = save_hashed_filename( unrarred_file, True) tid = queue_task(urfile, f_name, full_path, metadata, rescan=rescan) task_id_list.append(tid) except RuntimeError as e: msg = "ERROR: Failed to extract " + str(file_) + ' - ' + str(e) return make_response(jsonify({'Message': msg}), HTTP_BAD_REQUEST) else: # File was not an archive to extract f_name, full_path = save_hashed_filename(file_) tid = queue_task(original_filename, f_name, full_path, metadata, rescan=rescan) task_id_list = [tid] msg = {'task_ids': task_id_list} return make_response(jsonify({'Message': msg}), HTTP_CREATED)
def setup_class(cls): cls.real_mod_dir = multiscanner.MODULESDIR multiscanner.MODULESDIR = os.path.join(CWD, "modules") cls.filelist = utils.parseDir(os.path.join(CWD, 'files')) cls.files = ['a', 'b', 'C:\\c', '/d/d'] cls.threadDict = {}
def test_parseDir(): path = os.path.abspath(os.path.join(MS_WD, 'tests', 'dir_test')) result = utils.parseDir(path, recursive=False) expected = [os.path.join(path, '1.1.txt'), os.path.join(path, '1.2.txt')] assert sorted(result) == sorted(expected)
def create_task(): ''' Create a new task for a submitted file. Save the submitted file to UPLOAD_FOLDER, optionally unzipping it. Return task id and 201 status. ''' file_ = request.files['file'] if request.form.get('upload_type', None) == 'import': try: task_id = import_task(file_) except KeyError: return make_response( jsonify({'Message': 'Cannot import report missing \'Scan Time\' field!'}), HTTP_BAD_REQUEST) except InvalidScanTimeFormatError: return make_response( jsonify({'Message': 'Cannot import report with \'Scan Time\' of invalid format!'}), HTTP_BAD_REQUEST) except (UnicodeDecodeError, ValueError): return make_response( jsonify({'Message': 'Cannot import non-JSON files!'}), HTTP_BAD_REQUEST) return make_response( jsonify({'Message': {'task_ids': [task_id]}}), HTTP_CREATED ) original_filename = file_.filename metadata = {} task_id_list = [] extract_dir = None rescan = False for key in request.form.keys(): if key in ['file_id', 'archive-password', 'upload_type'] or request.form[key] == '': continue elif key == 'duplicate': if request.form[key] == 'latest': rescan = False elif request.form[key] == 'rescan': rescan = True elif key == 'modules': module_names = request.form[key] files = utils.parseDir(MODULESDIR, True) modules = [] for f in files: split = os.path.splitext(os.path.basename(f)) if split[0] in module_names and split[1] == '.py': modules.append(f) elif key == 'archive-analyze' and request.form[key] == 'true': extract_dir = api_config['api']['upload_folder'] if not os.path.isdir(extract_dir): return make_response( jsonify({'Message': "'upload_folder' in API config is not " "a valid folder!"}), HTTP_BAD_REQUEST) # Get password if present if 'archive-password' in request.form: password = request.form['archive-password'] if PY3: password = bytes(password, 'utf-8') else: password = '' else: metadata[key] = request.form[key] if extract_dir: # Extract a zip if zipfile.is_zipfile(file_): z = zipfile.ZipFile(file_) try: # NOTE: zipfile module prior to Py 2.7.4 is insecure! # https://docs.python.org/2/library/zipfile.html#zipfile.ZipFile.extract z.extractall(path=extract_dir, pwd=password) for uzfile in z.namelist(): unzipped_file = open(os.path.join(extract_dir, uzfile)) f_name, full_path = save_hashed_filename(unzipped_file, True) tid = queue_task(uzfile, f_name, full_path, metadata, rescan=rescan) task_id_list.append(tid) except RuntimeError as e: msg = "ERROR: Failed to extract " + str(file_) + ' - ' + str(e) return make_response( jsonify({'Message': msg}), HTTP_BAD_REQUEST) # Extract a rar elif rarfile.is_rarfile(file_): r = rarfile.RarFile(file_) try: r.extractall(path=extract_dir, pwd=password) for urfile in r.namelist(): unrarred_file = open(os.path.join(extract_dir, urfile)) f_name, full_path = save_hashed_filename(unrarred_file, True) tid = queue_task(urfile, f_name, full_path, metadata, rescan=rescan) task_id_list.append(tid) except RuntimeError as e: msg = "ERROR: Failed to extract " + str(file_) + ' - ' + str(e) return make_response( jsonify({'Message': msg}), HTTP_BAD_REQUEST) else: # File was not an archive to extract f_name, full_path = save_hashed_filename(file_) tid = queue_task(original_filename, f_name, full_path, metadata, rescan=rescan) task_id_list = [tid] msg = {'task_ids': task_id_list} return make_response( jsonify({'Message': msg}), HTTP_CREATED )
def multiscan(Files, recursive=False, configregen=False, configfile=CONFIG, config=None, module_list=None): """ The meat and potatoes. Returns the list of module results Files - A list of files and dirs to be scanned recursive - If true it will search the dirs in Files recursively configregen - If True a new config file will be created overwriting the old configfile - What config file to use. Can be None. config - A dictionary containing the configuration options to be used. module_list - A list of file paths to be used as modules. Each string should end in .py """ # Redirect stdout to stderr stdout = sys.stdout sys.stdout = sys.stderr # TODO: Make sure the cleanup from this works is something breaks # Init some vars # If recursive is False we don't parse the file list and take it as is. if recursive: filelist = parseFileList(Files, recursive=recursive) else: filelist = Files # A list of files in the module dir if module_list is None: module_list = parseDir(MODULESDIR, recursive=True, exclude=["__init__"]) # A dictionary used for the copyfileto parameter filedic = {} # What will be the config file object config_object = None # Read in config if configfile: config_object = configparser.SafeConfigParser() config_object.optionxform = str # Regen the config if needed or wanted if configregen or not os.path.isfile(configfile): _rewrite_config(module_list, config_object, filepath=configfile) config_object.read(configfile) main_config = _get_main_config(config_object, filepath=configfile) if config: file_conf = parse_config(config_object) for key in config: if key not in file_conf: file_conf[key] = config[key] file_conf[key]['_load_default'] = True else: file_conf[key].update(config[key]) config = file_conf else: config = parse_config(config_object) else: if config is None: config = {} else: config['_load_default'] = True if 'main' in config: main_config = config['main'] else: main_config = DEFAULTCONF # If none of the files existed if not filelist: sys.stdout = stdout raise ValueError("No valid files") # Copy files to a share if configured if "copyfilesto" not in main_config: main_config["copyfilesto"] = False if main_config["copyfilesto"]: if os.path.isdir(main_config["copyfilesto"]): filelist = _copy_to_share(filelist, filedic, main_config["copyfilesto"]) else: sys.stdout = stdout raise IOError('The copyfilesto dir "' + main_config["copyfilesto"] + '" is not a valid dir') # Create the global module interface global_module_interface = _GlobalModuleInterface() # Start a thread for each module thread_list = _start_module_threads(filelist, module_list, config, global_module_interface) # Write the default configure settings for missing ones if config_object: _write_missing_module_configs(module_list, config_object, filepath=configfile) # Warn about spaces in file names for f in filelist: if ' ' in f: print('WARNING: You are using file paths with spaces. This may result in modules not reporting correctly.') break # Wait for all threads to finish thread_wait_list = thread_list[:] i = 0 while thread_wait_list: i += 1 for thread in thread_wait_list: if not thread.is_alive(): i = 0 thread_wait_list.remove(thread) if VERBOSE: print(thread.name, "took", thread.endtime - thread.starttime) if i == 15: i = 0 if VERBOSE: p = 'Waiting on' for thread in thread_wait_list: p += ' ' + thread.name p += '...' print(p) time.sleep(1) # Delete copied files if main_config["copyfilesto"]: for item in filelist: try: os.remove(item) except OSError: pass # Get Result list results = [] for thread in thread_list: if thread.ret is not None: results.append(thread.ret) del thread # Translates file names back to the originals if filedic: # I have no idea if this is the best way to do in-place modifications for i in range(0, len(results)): (result, metadata) = results[i] modded = False for j in range(0, len(result)): (filename, hit) = result[j] base = basename(filename) if base in filedic: filename = filedic[base] modded = True result[j] = (filename, hit) if modded: results[i] = (result, metadata) # Scan subfiles if needed subscan_list = global_module_interface._get_subscan_list() if subscan_list: # Translate from_filename back to original if needed if filedic: for i in range(0, len(subscan_list)): file_path, from_filename, module_name = subscan_list[i] base = basename(from_filename) if base in filedic: from_filename = filedic[base] subscan_list[i] = (file_path, from_filename, module_name) results.extend(_subscan(subscan_list, config, main_config, module_list, global_module_interface)) global_module_interface._cleanup() # Return stdout to previous state sys.stdout = stdout return results
def setup_class(cls): cls.real_mod_dir = multiscanner.MODULESDIR multiscanner.MODULEDIR = os.path.join(CWD, "modules") cls.filelist = utils.parseDir(os.path.join(CWD, 'files')) multiscanner.CONFIG = '.tmpfile.ini'
from __future__ import division, absolute_import, with_statement, print_function, unicode_literals import os import tempfile import multiscanner from multiscanner.common import utils # Makes sure we use the multiscanner in ../ CWD = os.path.dirname(os.path.abspath(__file__)) module_list = [os.path.join(CWD, 'modules', 'test_conf.py')] filelist = utils.parseDir(os.path.join(CWD, 'files')) def test_no_config(): results, metadata = multiscanner.multiscan( filelist, configfile=None, config=None, recursive=None, module_list=module_list)[0] assert metadata['conf'] == {'a': 'b', 'c': 'd'} def test_config_api_no_file(): config = {'test_conf': {'a': 'z'}} results, metadata = multiscanner.multiscan( filelist, configfile=None, config=config, recursive=None, module_list=module_list)[0] assert metadata['conf'] == {'a': 'z', 'c': 'd'} def test_config_api_with_empty_file(): config = {'test_conf': {'a': 'z'}}
def scan(filelist, conf=DEFAULTCONF): ruleDir = conf["ruledir"] extlist = conf["fileextensions"] includes = 'includes' in conf and conf['includes'] ruleset = {} rules = parseDir(ruleDir, recursive=True) for r in rules: for ext in extlist: if r.endswith(ext): full_path = os.path.abspath(os.path.join(ruleDir, r)) ruleset[full_path] = full_path break # Ran into a weird issue with file locking, this fixes it goodtogo = False i = 0 yararules = None while not goodtogo: try: yararules = yara.compile(filepaths=ruleset, includes=includes) goodtogo = True except yara.SyntaxError as e: bad_file = os.path.abspath(str(e).split('(')[0]) if bad_file in ruleset: del ruleset[bad_file] print('WARNING: Yara', e) else: print( 'ERROR Yara: Invalid rule in', bad_file, 'but we are unable to remove it from our list. Aborting') print(e) return None matches = [] for m in filelist: # Ran into a weird issue with file locking, this fixes it goodtogo = False i = 0 while not goodtogo and i < 5: try: f = open(m, 'rb') goodtogo = True except Exception as e: print('yara:', e) time.sleep(3) i += 1 try: hit = yararules.match(data=f.read()) except Exception as e: # TODO: log exception continue finally: f.close() if hit: hdict = {} for h in hit: if not set(h.tags).intersection(set(conf["ignore-tags"])): hit_dict = { 'meta': h.meta, 'namespace': h.namespace, 'rule': h.rule, 'tags': h.tags, } try: h_key = '{}:{}'.format( hit_dict['namespace'].split('/')[-1], hit_dict['rule']) except IndexError: h_key = '{}'.format(hit_dict['rule']) hdict[h_key] = hit_dict matches.append((m, hdict)) metadata = {} rulelist = list(ruleset) rulelist.sort() metadata["Name"] = NAME metadata["Type"] = TYPE metadata["Rules"] = rulelist return (matches, metadata)