def test_all_started(self): ThreadList = multiscanner._start_module_threads( self.filelist, common.parseDir(os.path.join(CWD, "modules")), self.config, self.global_module_interface) time.sleep(.001) for t in ThreadList: assert t.started
def test_all_started(self): ThreadList = multiscanner._start_module_threads( self.filelist, common.parseDir(os.path.join(CWD, "modules")), self.Config ) time.sleep(0.001) for t in ThreadList: assert t.started
def fillDbFromDir(self): """ create database from directory """ for f in parseDir(self.dir, excludeFunc= lambda x : not isMedia(x)): self.add2db(f) if self.db: self.saveDb()
def scan(filelist, conf=DEFAULTCONF): ruleDir = conf["ruledir"] extlist = conf["fileextensions"] ruleset = {} rules = parseDir(ruleDir, recursive=True) for r in rules: for ext in extlist: if r.endswith(ext): ruleset[r] = os.path.join(ruleDir, r) break #Ran into a weird issue with file locking, this fixes it goodtogo = False i = 0 yararules = None while not goodtogo: try: yararules = yara.compile(filepaths=ruleset) goodtogo = True except yara.SyntaxError as e: bad_file = e.message.split('(')[0] del ruleset[bad_file] print(e) matches = [] for m in filelist: #Ran into a weird issue with file locking, this fixes it goodtogo = False i = 0 while not goodtogo and i < 5: try: f = open(m, 'rb') goodtogo = True except Exception as e: print('yara:', e) time.sleep(3) i += 1 try: hit = yararules.match(data=f.read()) except: continue finally: f.close() if hit: hlist = [] for h in hit: if not set(h.tags).intersection(set(conf["ignore-tags"])): hlist.append(str(h)) hlist.sort() matches.append((m, hlist)) metadata = {} rulelist = list(ruleset) rulelist.sort() metadata["Name"] = NAME metadata["Type"] = TYPE metadata["Rules"] = rulelist return (matches, metadata)
def config_init(filepath, module_list=parseDir(MODULEDIR, recursive=True)): """ Creates a new config file at filepath filepath - The config file to create """ Config = configparser.SafeConfigParser() Config.optionxform = str _rewrite_config(module_list, Config, filepath)
def config_init(filepath, module_list=parseDir(MODULEDIR, recursive=True)): """ Creates a new config file at filepath filepath - The config file to create """ Config = configparser.SafeConfigParser() Config.optionxform = str _rewite_config(module_list, Config, filepath)
def config_init(filepath): """ Creates a new config file at filepath filepath - The config file to create """ Config = configparser.SafeConfigParser() Config.optionxform = str ModuleList = parseDir(MODULEDIR) _rewite_config(ModuleList, Config, filepath)
def _init(args): # Initialize configuration file if os.path.isfile(args.config): print('Warning:', args.config, 'already exists, overwriting will destroy changes') answer = raw_input( 'Do you wish to overwrite the configuration file [y/N]:') if answer == 'y': config_init(args.config) print('Configuration file initialized at', args.config) else: print('Checking for missing modules in configuration...') ModuleList = parseDir(MODULEDIR, recursive=True) Config = configparser.SafeConfigParser() Config.optionxform = str Config.read(args.config) _write_missing_module_configs(ModuleList, Config, filepath=args.config) else: config_init(args.config) print('Configuration file initialized at', args.config) # Init storage Config = configparser.SafeConfigParser() Config.optionxform = str Config.read(args.config) config = _get_main_config(Config) if os.path.isfile(config["storage-config"]): print('Warning:', config["storage-config"], 'already exists, overwriting will destroy changes') answer = raw_input( 'Do you wish to overwrite the configuration file [y/N]:') if answer == 'y': storage.config_init(config["storage-config"], overwrite=True) print('Storage configuration file initialized at', config["storage-config"]) else: print('Checking for missing modules in storage configuration...') storage.config_init(config["storage-config"], overwrite=False) else: storage.config_init(config["storage-config"]) print('Storage configuration file initialized at', config["storage-config"]) exit(0)
def _get_storage_classes(dir_path=STORAGE_DIR): storage_classes = {} dir_list = common.parseDir(dir_path, recursive=True) dir_list.remove(os.path.join(dir_path, 'storage.py')) dir_list.remove(os.path.join(dir_path, '__init__.py')) for filename in dir_list: if filename.endswith('.py'): modname = os.path.basename(filename[:-3]) moddir = os.path.dirname(filename) mod = common.load_module(os.path.basename(modname), [moddir]) if not mod: print(filename, " not a valid module...") continue for member_name in dir(mod): member = getattr(mod, member_name) if inspect.isclass(member) and issubclass(member, Storage): storage_classes[member_name] = member() return storage_classes
def _init(args): # Initialize configuration file if os.path.isfile(args.config): print('Warning:', args.config, 'already exists, overwriting will destroy changes') answer = raw_input('Do you wish to overwrite the configuration file [y/N]:') if answer == 'y': config_init(args.config) print('Configuration file initialized at', args.config) else: print('Checking for missing modules in configuration...') ModuleList = parseDir(MODULEDIR, recursive=True) Config = configparser.SafeConfigParser() Config.optionxform = str Config.read(args.config) _write_missing_module_configs(ModuleList, Config, filepath=args.config) else: config_init(args.config) print('Configuration file initialized at', args.config) exit(0)
def _init(args): # Initialize configuration file if os.path.isfile(args.config): print('Warning:', args.config, 'already exists, overwriting will destroy changes') answer = raw_input( 'Do you wish to overwrite the configuration file [y/N]:') if answer == 'y': config_init(args.config) print('Configuration file initialized at', args.config) else: print('Checking for missing modules in configuration...') ModuleList = parseDir(MODULEDIR) Config = configparser.SafeConfigParser() Config.optionxform = str Config.read(args.config) _write_missing_module_configs(ModuleList, Config, filepath=args.config) else: config_init(args.config) print('Configuration file initialized at', args.config) exit(0)
def scan(filelist, conf=DEFAULTCONF): ruleDir = conf["ruledir"] extlist = conf["fileextensions"] includes = 'includes' in conf and conf['includes'] ruleset = {} rules = parseDir(ruleDir, recursive=True) for r in rules: for ext in extlist: if r.endswith(ext): full_path = os.path.abspath(os.path.join(ruleDir, r)) ruleset[full_path] = full_path break #Ran into a weird issue with file locking, this fixes it goodtogo = False i = 0 yararules = None while not goodtogo: try: yararules = yara.compile(filepaths=ruleset, includes=includes) goodtogo = True except yara.SyntaxError as e: bad_file = os.path.abspath(str(e).split('(')[0]) if bad_file in ruleset: del ruleset[bad_file] print('WARNING: Yara', e) else: print( 'ERROR Yara: Invalid rule in', bad_file, 'but we are unable to remove it from our list. Aborting') print(e) return None matches = [] for m in filelist: #Ran into a weird issue with file locking, this fixes it goodtogo = False i = 0 while not goodtogo and i < 5: try: f = open(m, 'rb') goodtogo = True except Exception as e: print('yara:', e) time.sleep(3) i += 1 try: hit = yararules.match(data=f.read()) except: continue finally: f.close() if hit: hlist = [] for h in hit: if not set(h.tags).intersection(set(conf["ignore-tags"])): hlist.append(str(h)) hlist.sort() matches.append((m, hlist)) metadata = {} rulelist = list(ruleset) rulelist.sort() metadata["Name"] = NAME metadata["Type"] = TYPE metadata["Rules"] = rulelist return (matches, metadata)
def multiscan(Files, recursive=False, configregen=False, configfile=CONFIG, config=None, module_list=None): """ The meat and potatoes. Returns the list of module results Files - A list of files and dirs to be scanned recursive - If true it will search the dirs in Files recursively configregen - If True a new config file will be created overwriting the old configfile - What config file to use. Can be None. config - A dictionary containing the configuration options to be used. """ # Redirect stdout to stderr stdout = sys.stdout sys.stdout = sys.stderr # TODO: Make sure the cleanup from this works is something breaks # Init some vars # If recursive is None we don't parse the file list and take it as is. if recursive is not None: filelist = parseFileList(Files, recursive=recursive) else: filelist = Files # A list of files in the module dir if module_list is None: module_list = parseDir(MODULEDIR, recursive=True) # A dictionary used for the copyfileto parameter filedic = {} # What will be the config file object config_object = None # Read in config if configfile: config_object = configparser.SafeConfigParser() config_object.optionxform = str # Regen the config if needed or wanted if configregen or not os.path.isfile(configfile): _rewite_config(module_list, config_object, filepath=configfile) config_object.read(configfile) main_config = _get_main_config(config_object, filepath=configfile) if config: file_conf = parse_config(config_object) for key in config: if key not in file_conf: file_conf[key] = config[key] file_conf[key]['_load_default'] = True else: file_conf[key].update(config[key]) config = file_conf else: config = parse_config(config_object) else: if config is None: config = {} else: config['_load_default'] = True if 'main' in config: main_config = config['main'] else: main_config = DEFAULTCONF # If none of the files existed if not filelist: sys.stdout = stdout raise ValueError("No valid files") # Copy files to a share if configured if "copyfilesto" not in main_config: main_config["copyfilesto"] = False if main_config["copyfilesto"]: if os.path.isdir(main_config["copyfilesto"]): filelist = _copy_to_share(filelist, filedic, main_config["copyfilesto"]) else: sys.stdout = stdout raise IOError('The copyfilesto dir" ' + main_config["copyfilesto"] + '" is not a valid dir') # Create the global module interface global_module_interface = _GlobalModuleInterface() # Start a thread for each module thread_list = _start_module_threads(filelist, module_list, config, global_module_interface) # Write the default configure settings for missing ones if config_object: _write_missing_module_configs(module_list, config_object, filepath=configfile) # Warn about spaces in file names for f in filelist: if ' ' in f: print('WARNING: You are using file paths with spaces. This may result in modules not reporting correctly.') break # Wait for all threads to finish thread_wait_list = thread_list[:] i = 0 while thread_wait_list: i += 1 for thread in thread_wait_list: if not thread.is_alive(): i = 0 thread_wait_list.remove(thread) if VERBOSE: print(thread.name, "took", thread.endtime-thread.starttime) if i == 15: i = 0 if VERBOSE: p = 'Waiting on' for thread in thread_wait_list: p += ' ' + thread.name p += '...' print(p) time.sleep(1) # Delete copied files if main_config["copyfilesto"]: for item in filelist: os.remove(item) # Get Result list results = [] for thread in thread_list: if thread.ret is not None: results.append(thread.ret) del thread # Translates file names back to the originals if filedic: # I have no idea if this is the best way to do in-place modifications for i in range(0, len(results)): (result, metadata) = results[i] modded = False for j in range(0, len(result)): (filename, hit) = result[j] base = basename(filename) if base in filedic: filename = filedic[base] modded = True result[j] = (filename, hit) if modded: results[i] = (result, metadata) # Scan subfiles if needed subscan_list = global_module_interface._get_subscan_list() if subscan_list: # Translate from_filename back to original if needed if filedic: for i in range(0, len(subscan_list)): file_path, from_filename, module_name = subscan_list[i] base = basename(from_filename) if base in filedic: from_filename = filedic[base] subscan_list[i] = (file_path, from_filename, module_name) results.extend(_subscan(subscan_list, config, main_config, module_list, global_module_interface)) global_module_interface._cleanup() # Return stdout to previous state sys.stdout = stdout return results
def setup_class(cls): cls.real_mod_dir = multiscanner.MODULEDIR multiscanner.MODULEDIR = os.path.join(CWD, "modules") cls.filelist = common.parseDir(os.path.join(CWD, "files")) cls.files = ["a", "b", "C:\\c", "/d/d"] cls.threadDict = {}
def setup_class(cls): cls.real_mod_dir = multiscanner.MODULEDIR multiscanner.MODULEDIR = os.path.join(CWD, "modules") cls.filelist = common.parseDir(os.path.join(CWD, 'files')) multiscanner.CONFIG = '.tmpfile.ini'
def multiscan(Files, recursive=False, configregen=False, configfile=CONFIG, config=None, module_list=None): """ The meat and potatoes. Returns the list of module results Files - A list of files and dirs to be scanned recursive - If true it will search the dirs in Files recursively configregen - If True a new config file will be created overwriting the old configfile - What config file to use. Can be None. config - A dictionary containing the configuration options to be used. """ # Redirect stdout to stderr stdout = sys.stdout sys.stdout = sys.stderr # TODO: Make sure the cleanup from this works is something breaks # Init some vars # If recursive is None we don't parse the file list and take it as is. if recursive is not None: filelist = parseFileList(Files, recursive=recursive) else: filelist = Files # A list of files in the module dir if module_list is None: module_list = parseDir(MODULEDIR, recursive=True) # A dictionary used for the copyfileto parameter filedic = {} # What will be the config file object config_object = None # Read in config if configfile: config_object = configparser.SafeConfigParser() config_object.optionxform = str # Regen the config if needed or wanted if configregen or not os.path.isfile(configfile): _rewrite_config(module_list, config_object, filepath=configfile) config_object.read(configfile) main_config = _get_main_config(config_object, filepath=configfile) if config: file_conf = parse_config(config_object) for key in config: if key not in file_conf: file_conf[key] = config[key] file_conf[key]['_load_default'] = True else: file_conf[key].update(config[key]) config = file_conf else: config = parse_config(config_object) else: if config is None: config = {} else: config['_load_default'] = True if 'main' in config: main_config = config['main'] else: main_config = DEFAULTCONF # If none of the files existed if not filelist: sys.stdout = stdout raise ValueError("No valid files") # Copy files to a share if configured if "copyfilesto" not in main_config: main_config["copyfilesto"] = False if main_config["copyfilesto"]: if os.path.isdir(main_config["copyfilesto"]): filelist = _copy_to_share(filelist, filedic, main_config["copyfilesto"]) else: sys.stdout = stdout raise IOError('The copyfilesto dir" ' + main_config["copyfilesto"] + '" is not a valid dir') # Create the global module interface global_module_interface = _GlobalModuleInterface() # Start a thread for each module thread_list = _start_module_threads(filelist, module_list, config, global_module_interface) # Write the default configure settings for missing ones if config_object: _write_missing_module_configs(module_list, config_object, filepath=configfile) # Warn about spaces in file names for f in filelist: if ' ' in f: print( 'WARNING: You are using file paths with spaces. This may result in modules not reporting correctly.' ) break # Wait for all threads to finish thread_wait_list = thread_list[:] i = 0 while thread_wait_list: i += 1 for thread in thread_wait_list: if not thread.is_alive(): i = 0 thread_wait_list.remove(thread) if VERBOSE: print(thread.name, "took", thread.endtime - thread.starttime) if i == 15: i = 0 if VERBOSE: p = 'Waiting on' for thread in thread_wait_list: p += ' ' + thread.name p += '...' print(p) time.sleep(1) # Delete copied files if main_config["copyfilesto"]: for item in filelist: try: os.remove(item) except OSError: pass # Get Result list results = [] for thread in thread_list: if thread.ret is not None: results.append(thread.ret) del thread # Translates file names back to the originals if filedic: # I have no idea if this is the best way to do in-place modifications for i in range(0, len(results)): (result, metadata) = results[i] modded = False for j in range(0, len(result)): (filename, hit) = result[j] base = basename(filename) if base in filedic: filename = filedic[base] modded = True result[j] = (filename, hit) if modded: results[i] = (result, metadata) # Scan subfiles if needed subscan_list = global_module_interface._get_subscan_list() if subscan_list: # Translate from_filename back to original if needed if filedic: for i in range(0, len(subscan_list)): file_path, from_filename, module_name = subscan_list[i] base = basename(from_filename) if base in filedic: from_filename = filedic[base] subscan_list[i] = (file_path, from_filename, module_name) results.extend( _subscan(subscan_list, config, main_config, module_list, global_module_interface)) global_module_interface._cleanup() # Return stdout to previous state sys.stdout = stdout return results
def test_parseDir(): path = os.path.abspath(os.path.join(MS_WD, 'tests', 'dir_test')) result = common.parseDir(path, recursive=False) expected = [os.path.join(path, '1.1.txt'), os.path.join(path, '1.2.txt')] assert result == expected
def setup_class(cls): cls.real_mod_dir = multiscanner.MODULEDIR multiscanner.MODULEDIR = os.path.join(CWD, "modules") cls.filelist = common.parseDir(os.path.join(CWD, 'files')) cls.files = ['a', 'b', 'C:\\c', '/d/d'] cls.threadDict = {}
# Allow import of celery_worker.py if os.path.join(MS_WD, 'utils') not in sys.path: sys.path.insert(0, os.path.join(MS_WD, 'utils')) if os.path.join(MS_WD, 'storage') not in sys.path: sys.path.insert(0, os.path.join(MS_WD, 'storage')) if os.path.join(MS_WD, 'libs') not in sys.path: sys.path.append(os.path.join(MS_WD, 'libs')) # Use multiscanner in ../ sys.path.insert(0, os.path.dirname(CWD)) # Get a subset of simple modules to run in testing # the celery worker MODULEDIR = os.path.join(MS_WD, "modules") MODULE_LIST = common.parseDir(MODULEDIR, recursive=True) DESIRED_MODULES = [ 'entropy.py', 'MD5.py', 'SHA1.py', 'SHA256.py', 'libmagic.py', 'ssdeep.py' ] MODULES_TO_TEST = [i for e in DESIRED_MODULES for i in MODULE_LIST if e in i] TEST_DB_PATH = os.path.join(CWD, 'testing.db') if os.path.exists(TEST_DB_PATH): os.remove(TEST_DB_PATH) DB_CONF = Database.DEFAULTCONF DB_CONF['db_name'] = TEST_DB_PATH # Metadata about test file TEST_FULL_PATH = os.path.join(CWD, 'files/123.txt') TEST_ORIGINAL_FILENAME = TEST_FULL_PATH.split('/')[-1] TEST_TASK_ID = 1
def multiscan(Files, recursive=False, configregen=False, configfile=CONFIG): """ The meat and potatoes. Returns the list of module results Files - A list of files and dirs to be scanned recursive - If true it will search the dirs in Files recursively configregen - If True a new config file will be created overwriting the old configfile - What config file to use """ # Redirect stdout to stderr stdout = sys.stdout sys.stdout = sys.stderr # TODO: Make sure the cleanup from this works is something breaks # Init some vars # If recursive is None we don't parse the file list and take it as is. if recursive is not None: filelist = parseFileList(Files, recursive=recursive) else: filelist = Files # A list of files in the module dir # TODO: This should just be a list of .py's that is passed ModuleList = parseDir(MODULEDIR) # A dictionary used for the copyfileto parameter filedic = {} # Read in config file Config = configparser.SafeConfigParser() Config.optionxform = str # Regen the config if needed or wanted if configregen or not os.path.isfile(configfile): _rewite_config(ModuleList, Config, filepath=configfile) Config.read(configfile) config = _get_main_config(Config, filepath=configfile) # If none of the files existed if not filelist: sys.stdout = stdout raise ValueError("No valid files") # Copy files to a share if configured if "copyfilesto" not in config: config["copyfilesto"] = False if config["copyfilesto"]: if os.path.isdir(config["copyfilesto"]): filelist = _copy_to_share(filelist, filedic, config["copyfilesto"]) else: sys.stdout = stdout raise IOError('The copyfilesto dir" ' + config["copyfilesto"] + '" is not a valid dir') # Start a thread for each module ThreadList = _start_module_threads(filelist, ModuleList, Config) # Write the default configure settings for missing ones _write_missing_module_configs(ModuleList, Config, filepath=configfile) # Wait for all threads to finish for thread in ThreadList: thread.join() if VERBOSE: for thread in ThreadList: print(thread.name, "took", thread.endtime-thread.starttime) # Delete copied files if config["copyfilesto"]: for item in filelist: os.remove(item) # Get Result list results = [] for thread in ThreadList: if thread.ret is not None: results.append(thread.ret) del thread # Translates file names back to the originals if filedic: # I have no idea if this is the best way to do in-place modifications for i in range(0, len(results)): (result, metadata) = results[i] modded = False for j in range(0, len(result)): (filename, hit) = result[j] # This is ugly but os.path.basename is os dependent base = filename.split("\\")[-1].split("/")[-1] if base in filedic: filename = filedic[base] modded = True result[j] = (filename, hit) if modded: results[i] = (result, metadata) # Return stdout to previous state sys.stdout = stdout return results
def test_parseDir(): path = os.path.abspath(os.path.join(MS_WD, 'tests', 'dir_test')) result = common.parseDir(path, recursive=False) expected = [os.path.join(path, '1.1.txt'), os.path.join(path, '1.2.txt')] assert sorted(result) == sorted(expected)
from __future__ import division, absolute_import, with_statement, print_function, unicode_literals import os import sys import tempfile # Makes sure we use the multiscanner in ../ CWD = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.dirname(CWD)) import multiscanner sys.path.append(os.path.join(CWD, '..', 'libs')) import common multiscanner.MODULEDIR = os.path.join(CWD, "modules") module_list = [os.path.join(CWD, 'modules', 'test_conf.py')] filelist = common.parseDir(os.path.join(CWD, 'files')) def test_no_config(): results, metadata = multiscanner.multiscan(filelist, configfile=None, config=None, recursive=None, module_list=module_list)[0] assert metadata['conf'] == {'a': 'b', 'c': 'd'} def test_config_api_no_file(): config = {'test_conf': {'a': 'z'}} results, metadata = multiscanner.multiscan(filelist, configfile=None, config=config, recursive=None, module_list=module_list)[0] assert metadata['conf'] == {'a': 'z', 'c': 'd'} def test_config_api_with_empty_file(): config = {'test_conf': {'a': 'z'}} config_file = tempfile.mkstemp()[1] results, metadata = multiscanner.multiscan(filelist, configfile=config_file, config=config, recursive=None, module_list=module_list)[0] os.remove(config_file) assert metadata['conf'] == {'a': 'z', 'c': 'd'}