示例#1
0
 def test_all_started(self):
     ThreadList = multiscanner._start_module_threads(
         self.filelist, utils.parseDir(os.path.join(CWD, "modules")),
         self.config, self.global_module_interface)
     time.sleep(.001)
     for t in ThreadList:
         assert t.started
示例#2
0
文件: ms.py 项目: yehias/multiscanner
def _init(args):
    # Initialize configuration file
    if os.path.isfile(args.config):
        print('Warning:', args.config,
              'already exists, overwriting will destroy changes')
        try:
            answer = input(
                'Do you wish to overwrite the configuration file [y/N]:')
        except EOFError:
            answer = 'N'
        if answer == 'y':
            config_init(args.config)
        else:
            print('Checking for missing modules in configuration...')
            ModuleList = parseDir(MODULESDIR,
                                  recursive=True,
                                  exclude=["__init__"])
            config = configparser.SafeConfigParser()
            config.optionxform = str
            config.read(args.config)
            _write_missing_module_configs(ModuleList,
                                          config,
                                          filepath=args.config)
    else:
        config_init(args.config)

    # Init storage
    config = configparser.SafeConfigParser()
    config.optionxform = str
    config.read(args.config)
    config = _get_main_config(config)
    if os.path.isfile(config["storage-config"]):
        print('Warning:', config["storage-config"],
              'already exists, overwriting will destroy changes')
        try:
            answer = input(
                'Do you wish to overwrite the configuration file [y/N]:')
        except EOFError:
            answer = 'N'
        if answer == 'y':
            storage.config_init(config["storage-config"], overwrite=True)
            print('Storage configuration file initialized at',
                  config["storage-config"])
        else:
            print('Checking for missing modules in storage configuration...')
            storage.config_init(config["storage-config"], overwrite=False)
    else:
        storage.config_init(config["storage-config"])
        print('Storage configuration file initialized at',
              config["storage-config"])

    exit(0)
示例#3
0
def config_init(filepath, module_list=parseDir(MODULESDIR, recursive=True, exclude=["__init__"])):
    """
    Creates a new config file at filepath

    filepath - The config file to create
    """
    config = configparser.SafeConfigParser()
    config.optionxform = str

    if filepath:
        _rewrite_config(module_list, config, filepath)
    else:
        filepath = determine_configuration_path(filepath)
        _rewrite_config(module_list, config, filepath)
    print('Configuration file initialized at', filepath)
示例#4
0
文件: ms.py 项目: yehias/multiscanner
def config_init(filepath,
                module_list=parseDir(MODULESDIR,
                                     recursive=True,
                                     exclude=["__init__"])):
    """
    Creates a new config file at filepath

    filepath - The config file to create
    """
    config = configparser.SafeConfigParser()
    config.optionxform = str

    if filepath:
        _rewrite_config(module_list, config, filepath)
    else:
        filepath = determine_configuration_path(filepath)
        _rewrite_config(module_list, config, filepath)
    print('Configuration file initialized at', filepath)
示例#5
0
def modules():
    '''
    Return a list of module names available for MultiScanner to use,
    and whether or not they are enabled in the config.
    '''
    files = utils.parseDir(MODULESDIR, True)
    filenames = [os.path.splitext(os.path.basename(f)) for f in files]
    module_names = [m[0] for m in filenames if m[1] == '.py']

    ms_config = configparser.SafeConfigParser()
    ms_config.optionxform = str
    ms_config.read(MS_CONFIG)
    modules = {}
    for module in module_names:
        try:
            modules[module] = ms_config.get(module, 'ENABLED')
        except (configparser.NoSectionError, configparser.NoOptionError):
            pass
    return jsonify({'Modules': modules})
示例#6
0
def modules():
    '''
    Return a list of module names available for MultiScanner to use,
    and whether or not they are enabled in the config.
    '''
    files = utils.parseDir(MODULESDIR, True)
    filenames = [os.path.splitext(os.path.basename(f)) for f in files]
    module_names = [m[0] for m in filenames if m[1] == '.py']

    ms_config = configparser.SafeConfigParser()
    ms_config.optionxform = str
    ms_config.read(MS_CONFIG)
    modules = {}
    for module in module_names:
        try:
            modules[module] = ms_config.get(module, 'ENABLED')
        except (configparser.NoSectionError, configparser.NoOptionError):
            pass
    return jsonify({'Modules': modules})
示例#7
0
def _init(args):
    # Initialize configuration file
    if os.path.isfile(args.config):
        print('Warning:', args.config, 'already exists, overwriting will destroy changes')
        try:
            answer = input('Do you wish to overwrite the configuration file [y/N]:')
        except EOFError:
            answer = 'N'
        if answer == 'y':
            config_init(args.config)
        else:
            print('Checking for missing modules in configuration...')
            ModuleList = parseDir(MODULESDIR, recursive=True, exclude=["__init__"])
            config = configparser.SafeConfigParser()
            config.optionxform = str
            config.read(args.config)
            _write_missing_module_configs(ModuleList, config, filepath=args.config)
    else:
        config_init(args.config)

    # Init storage
    config = configparser.SafeConfigParser()
    config.optionxform = str
    config.read(args.config)
    config = _get_main_config(config)
    if os.path.isfile(config["storage-config"]):
        print('Warning:', config["storage-config"], 'already exists, overwriting will destroy changes')
        try:
            answer = input('Do you wish to overwrite the configuration file [y/N]:')
        except EOFError:
            answer = 'N'
        if answer == 'y':
            storage.config_init(config["storage-config"], overwrite=True)
            print('Storage configuration file initialized at', config["storage-config"])
        else:
            print('Checking for missing modules in storage configuration...')
            storage.config_init(config["storage-config"], overwrite=False)
    else:
        storage.config_init(config["storage-config"])
        print('Storage configuration file initialized at', config["storage-config"])

    exit(0)
示例#8
0
def _get_storage_classes(dir_path=STORAGE_DIR):
    storage_classes = {}
    dir_list = utils.parseDir(dir_path, recursive=True)
    dir_list.remove(os.path.join(dir_path, 'storage.py'))
    # dir_list.remove(os.path.join(dir_path, '__init__.py'))
    dir_list.remove(os.path.join(dir_path, 'sql_driver.py'))
    for filename in dir_list:
        if filename.endswith('.py'):
            modname = os.path.basename(filename[:-3])
            moddir = os.path.dirname(filename)
            mod = utils.load_module(os.path.basename(modname), [moddir])
            if not mod:
                print(filename, 'not a valid storage module...')
                continue
            for member_name in dir(mod):
                member = getattr(mod, member_name)
                if inspect.isclass(member) and issubclass(member, Storage):
                    storage_classes[member_name] = member()
    if 'Storage' in storage_classes:
        del storage_classes['Storage']
    return storage_classes
示例#9
0
from __future__ import division, absolute_import, with_statement, print_function, unicode_literals
import os
import tempfile

import multiscanner
from multiscanner.common import utils

# Makes sure we use the multiscanner in ../
CWD = os.path.dirname(os.path.abspath(__file__))

module_list = [os.path.join(CWD, 'modules', 'test_conf.py')]
filelist = utils.parseDir(os.path.join(CWD, 'files'))


def test_no_config():
    results, metadata = multiscanner.multiscan(filelist,
                                               configfile=None,
                                               config=None,
                                               recursive=None,
                                               module_list=module_list)[0]
    assert metadata['conf'] == {'a': 'b', 'c': 'd'}


def test_config_api_no_file():
    config = {'test_conf': {'a': 'z'}}
    results, metadata = multiscanner.multiscan(filelist,
                                               configfile=None,
                                               config=config,
                                               recursive=None,
                                               module_list=module_list)[0]
    assert metadata['conf'] == {'a': 'z', 'c': 'd'}
示例#10
0
from multiscanner.common import utils
from multiscanner.distributed import celery_worker
from multiscanner.storage.sql_driver import Database

try:
    from StringIO import StringIO as BytesIO
except ImportError:
    from io import BytesIO

CWD = os.path.dirname(os.path.abspath(__file__))
MS_WD = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

# Get a subset of simple modules to run in testing
# the celery worker
MODULE_LIST = utils.parseDir(multiscanner.MODULESDIR, recursive=True)
DESIRED_MODULES = [
    'entropy.py', 'MD5.py', 'SHA1.py', 'SHA256.py', 'libmagic.py', 'ssdeep.py'
]
MODULES_TO_TEST = [i for e in DESIRED_MODULES for i in MODULE_LIST if e in i]

TEST_DB_PATH = os.path.join(CWD, 'testing.db')
if os.path.exists(TEST_DB_PATH):
    os.remove(TEST_DB_PATH)
DB_CONF = Database.DEFAULTCONF
DB_CONF['db_name'] = TEST_DB_PATH

# Metadata about test file
TEST_FULL_PATH = os.path.join(CWD, 'files/123.txt')
TEST_ORIGINAL_FILENAME = TEST_FULL_PATH.split('/')[-1]
TEST_TASK_ID = 1
示例#11
0
def scan(filelist, conf=DEFAULTCONF):
    ruleDir = conf["ruledir"]
    extlist = conf["fileextensions"]
    includes = 'includes' in conf and conf['includes']

    ruleset = {}
    rules = parseDir(ruleDir, recursive=True)
    for r in rules:
        for ext in extlist:
            if r.endswith(ext):
                full_path = os.path.abspath(os.path.join(ruleDir, r))
                ruleset[full_path] = full_path
                break

    # Ran into a weird issue with file locking, this fixes it
    goodtogo = False
    i = 0
    yararules = None
    while not goodtogo:
        try:
            yararules = yara.compile(filepaths=ruleset, includes=includes)
            goodtogo = True
        except yara.SyntaxError as e:
            bad_file = os.path.abspath(str(e).split('(')[0])
            if bad_file in ruleset:
                del ruleset[bad_file]
                print('WARNING: Yara', e)
            else:
                print('ERROR Yara: Invalid rule in', bad_file, 'but we are unable to remove it from our list. Aborting')
                print(e)
                return None

    matches = []
    for m in filelist:
        # Ran into a weird issue with file locking, this fixes it
        goodtogo = False
        i = 0
        while not goodtogo and i < 5:
            try:
                f = open(m, 'rb')
                goodtogo = True
            except Exception as e:
                print('yara:', e)
                time.sleep(3)
                i += 1
        try:
            hit = yararules.match(data=f.read())
        except Exception as e:
            # TODO: log exception
            continue
        finally:
            f.close()
        if hit:
            hdict = {}
            for h in hit:
                if not set(h.tags).intersection(set(conf["ignore-tags"])):
                    hit_dict = {
                        'meta': h.meta,
                        'namespace': h.namespace,
                        'rule': h.rule,
                        'tags': h.tags,
                    }
                    try:
                        h_key = '{}:{}'.format(hit_dict['namespace'].split('/')[-1], hit_dict['rule'])
                    except IndexError:
                        h_key = '{}'.format(hit_dict['rule'])
                    hdict[h_key] = hit_dict
            matches.append((m, hdict))

    metadata = {}
    rulelist = list(ruleset)
    rulelist.sort()
    metadata["Name"] = NAME
    metadata["Type"] = TYPE
    metadata["Rules"] = rulelist
    return (matches, metadata)
示例#12
0
文件: ms.py 项目: yehias/multiscanner
def multiscan(Files,
              recursive=False,
              configregen=False,
              configfile=CONFIG,
              config=None,
              module_list=None):
    """
    The meat and potatoes. Returns the list of module results

    Files - A list of files and dirs to be scanned
    recursive - If true it will search the dirs in Files recursively
    configregen - If True a new config file will be created overwriting the old
    configfile - What config file to use. Can be None.
    config - A dictionary containing the configuration options to be used.
    module_list - A list of file paths to be used as modules. Each string should end in .py
    """
    # Redirect stdout to stderr
    stdout = sys.stdout
    sys.stdout = sys.stderr
    # TODO: Make sure the cleanup from this works is something breaks

    # Init some vars
    # If recursive is False we don't parse the file list and take it as is.
    if recursive:
        filelist = parseFileList(Files, recursive=recursive)
    else:
        filelist = Files
    # A list of files in the module dir
    if module_list is None:
        module_list = parseDir(MODULESDIR,
                               recursive=True,
                               exclude=["__init__"])
    # A dictionary used for the copyfileto parameter
    filedic = {}
    # What will be the config file object
    config_object = None

    # Read in config
    if configfile:
        config_object = configparser.SafeConfigParser()
        config_object.optionxform = str
        # Regen the config if needed or wanted
        if configregen or not os.path.isfile(configfile):
            _rewrite_config(module_list, config_object, filepath=configfile)

        config_object.read(configfile)
        main_config = _get_main_config(config_object, filepath=configfile)
        if config:
            file_conf = parse_config(config_object)
            for key in config:
                if key not in file_conf:
                    file_conf[key] = config[key]
                    file_conf[key]['_load_default'] = True
                else:
                    file_conf[key].update(config[key])
            config = file_conf
        else:
            config = parse_config(config_object)
    else:
        if config is None:
            config = {}
        else:
            config['_load_default'] = True
        if 'main' in config:
            main_config = config['main']
        else:
            main_config = DEFAULTCONF

    # If none of the files existed
    if not filelist:
        sys.stdout = stdout
        raise ValueError("No valid files")

    # Copy files to a share if configured
    if "copyfilesto" not in main_config:
        main_config["copyfilesto"] = False
    if main_config["copyfilesto"]:
        if os.path.isdir(main_config["copyfilesto"]):
            filelist = _copy_to_share(filelist, filedic,
                                      main_config["copyfilesto"])
        else:
            sys.stdout = stdout
            raise IOError('The copyfilesto dir "' +
                          main_config["copyfilesto"] + '" is not a valid dir')

    # Create the global module interface
    global_module_interface = _GlobalModuleInterface()

    # Start a thread for each module
    thread_list = _start_module_threads(filelist, module_list, config,
                                        global_module_interface)

    # Write the default configure settings for missing ones
    if config_object:
        _write_missing_module_configs(module_list,
                                      config_object,
                                      filepath=configfile)

    # Warn about spaces in file names
    for f in filelist:
        if ' ' in f:
            print(
                'WARNING: You are using file paths with spaces. This may result in modules not reporting correctly.'
            )
            break

    # Wait for all threads to finish
    thread_wait_list = thread_list[:]
    i = 0
    while thread_wait_list:
        i += 1
        for thread in thread_wait_list:
            if not thread.is_alive():
                i = 0
                thread_wait_list.remove(thread)
                if VERBOSE:
                    print(thread.name, "took",
                          thread.endtime - thread.starttime)
        if i == 15:
            i = 0
            if VERBOSE:
                p = 'Waiting on'
                for thread in thread_wait_list:
                    p += ' ' + thread.name
                p += '...'
                print(p)
        time.sleep(1)

    # Delete copied files
    if main_config["copyfilesto"]:
        for item in filelist:
            try:
                os.remove(item)
            except OSError:
                pass

    # Get Result list
    results = []
    for thread in thread_list:
        if thread.ret is not None:
            results.append(thread.ret)
        del thread

    # Translates file names back to the originals
    if filedic:
        # I have no idea if this is the best way to do in-place modifications
        for i in range(0, len(results)):
            (result, metadata) = results[i]
            modded = False
            for j in range(0, len(result)):
                (filename, hit) = result[j]
                base = basename(filename)
                if base in filedic:
                    filename = filedic[base]
                    modded = True
                    result[j] = (filename, hit)
            if modded:
                results[i] = (result, metadata)

    # Scan subfiles if needed
    subscan_list = global_module_interface._get_subscan_list()
    if subscan_list:
        # Translate from_filename back to original if needed
        if filedic:
            for i in range(0, len(subscan_list)):
                file_path, from_filename, module_name = subscan_list[i]
                base = basename(from_filename)
                if base in filedic:
                    from_filename = filedic[base]
                    subscan_list[i] = (file_path, from_filename, module_name)

        results.extend(
            _subscan(subscan_list, config, main_config, module_list,
                     global_module_interface))

    global_module_interface._cleanup()

    # Return stdout to previous state
    sys.stdout = stdout
    return results
示例#13
0
def create_task():
    '''
    Create a new task for a submitted file. Save the submitted file to
    UPLOAD_FOLDER, optionally unzipping it. Return task id and 201 status.
    '''
    file_ = request.files['file']
    if request.form.get('upload_type', None) == 'import':
        try:
            task_id = import_task(file_)
        except KeyError:
            return make_response(
                jsonify({
                    'Message':
                    'Cannot import report missing \'Scan Time\' field!'
                }), HTTP_BAD_REQUEST)
        except InvalidScanTimeFormatError:
            return make_response(
                jsonify({
                    'Message':
                    'Cannot import report with \'Scan Time\' of invalid format!'
                }), HTTP_BAD_REQUEST)
        except (UnicodeDecodeError, ValueError):
            return make_response(
                jsonify({'Message': 'Cannot import non-JSON files!'}),
                HTTP_BAD_REQUEST)

        return make_response(jsonify({'Message': {
            'task_ids': [task_id]
        }}), HTTP_CREATED)

    original_filename = file_.filename

    metadata = {}
    task_id_list = []
    extract_dir = None
    rescan = False
    for key in request.form.keys():
        if key in ['file_id', 'archive-password', 'upload_type'
                   ] or request.form[key] == '':
            continue
        elif key == 'duplicate':
            if request.form[key] == 'latest':
                rescan = False
            elif request.form[key] == 'rescan':
                rescan = True
        elif key == 'modules':
            module_names = request.form[key]
            files = utils.parseDir(MODULESDIR, True)
            modules = []
            for f in files:
                split = os.path.splitext(os.path.basename(f))
                if split[0] in module_names and split[1] == '.py':
                    modules.append(f)
        elif key == 'archive-analyze' and request.form[key] == 'true':
            extract_dir = api_config['api']['upload_folder']
            if not os.path.isdir(extract_dir):
                return make_response(
                    jsonify({
                        'Message':
                        "'upload_folder' in API config is not "
                        "a valid folder!"
                    }), HTTP_BAD_REQUEST)

            # Get password if present
            if 'archive-password' in request.form:
                password = request.form['archive-password']
                if PY3:
                    password = bytes(password, 'utf-8')
            else:
                password = ''
        else:
            metadata[key] = request.form[key]

    if extract_dir:
        # Extract a zip
        if zipfile.is_zipfile(file_):
            z = zipfile.ZipFile(file_)
            try:
                # NOTE: zipfile module prior to Py 2.7.4 is insecure!
                # https://docs.python.org/2/library/zipfile.html#zipfile.ZipFile.extract
                z.extractall(path=extract_dir, pwd=password)
                for uzfile in z.namelist():
                    unzipped_file = open(os.path.join(extract_dir, uzfile))
                    f_name, full_path = save_hashed_filename(
                        unzipped_file, True)
                    tid = queue_task(uzfile,
                                     f_name,
                                     full_path,
                                     metadata,
                                     rescan=rescan)
                    task_id_list.append(tid)
            except RuntimeError as e:
                msg = "ERROR: Failed to extract " + str(file_) + ' - ' + str(e)
                return make_response(jsonify({'Message': msg}),
                                     HTTP_BAD_REQUEST)
        # Extract a rar
        elif rarfile.is_rarfile(file_):
            r = rarfile.RarFile(file_)
            try:
                r.extractall(path=extract_dir, pwd=password)
                for urfile in r.namelist():
                    unrarred_file = open(os.path.join(extract_dir, urfile))
                    f_name, full_path = save_hashed_filename(
                        unrarred_file, True)
                    tid = queue_task(urfile,
                                     f_name,
                                     full_path,
                                     metadata,
                                     rescan=rescan)
                    task_id_list.append(tid)
            except RuntimeError as e:
                msg = "ERROR: Failed to extract " + str(file_) + ' - ' + str(e)
                return make_response(jsonify({'Message': msg}),
                                     HTTP_BAD_REQUEST)
    else:
        # File was not an archive to extract
        f_name, full_path = save_hashed_filename(file_)
        tid = queue_task(original_filename,
                         f_name,
                         full_path,
                         metadata,
                         rescan=rescan)
        task_id_list = [tid]

    msg = {'task_ids': task_id_list}
    return make_response(jsonify({'Message': msg}), HTTP_CREATED)
示例#14
0
 def test_all_started(self):
     ThreadList = multiscanner._start_module_threads(
         self.filelist, utils.parseDir(os.path.join(CWD, "modules")), self.config, self.global_module_interface)
     time.sleep(.001)
     for t in ThreadList:
         assert t.started
示例#15
0
 def setup_class(cls):
     cls.real_mod_dir = multiscanner.MODULESDIR
     multiscanner.MODULESDIR = os.path.join(CWD, "modules")
     cls.filelist = utils.parseDir(os.path.join(CWD, 'files'))
     cls.files = ['a', 'b', 'C:\\c', '/d/d']
     cls.threadDict = {}
示例#16
0
def test_parseDir():
    path = os.path.abspath(os.path.join(MS_WD, 'tests', 'dir_test'))
    result = utils.parseDir(path, recursive=False)
    expected = [os.path.join(path, '1.1.txt'), os.path.join(path, '1.2.txt')]
    assert sorted(result) == sorted(expected)
示例#17
0
def create_task():
    '''
    Create a new task for a submitted file. Save the submitted file to
    UPLOAD_FOLDER, optionally unzipping it. Return task id and 201 status.
    '''
    file_ = request.files['file']
    if request.form.get('upload_type', None) == 'import':
        try:
            task_id = import_task(file_)
        except KeyError:
            return make_response(
                jsonify({'Message': 'Cannot import report missing \'Scan Time\' field!'}),
                HTTP_BAD_REQUEST)
        except InvalidScanTimeFormatError:
            return make_response(
                jsonify({'Message': 'Cannot import report with \'Scan Time\' of invalid format!'}),
                HTTP_BAD_REQUEST)
        except (UnicodeDecodeError, ValueError):
            return make_response(
                jsonify({'Message': 'Cannot import non-JSON files!'}),
                HTTP_BAD_REQUEST)

        return make_response(
            jsonify({'Message': {'task_ids': [task_id]}}),
            HTTP_CREATED
        )

    original_filename = file_.filename

    metadata = {}
    task_id_list = []
    extract_dir = None
    rescan = False
    for key in request.form.keys():
        if key in ['file_id', 'archive-password', 'upload_type'] or request.form[key] == '':
            continue
        elif key == 'duplicate':
            if request.form[key] == 'latest':
                rescan = False
            elif request.form[key] == 'rescan':
                rescan = True
        elif key == 'modules':
            module_names = request.form[key]
            files = utils.parseDir(MODULESDIR, True)
            modules = []
            for f in files:
                split = os.path.splitext(os.path.basename(f))
                if split[0] in module_names and split[1] == '.py':
                    modules.append(f)
        elif key == 'archive-analyze' and request.form[key] == 'true':
            extract_dir = api_config['api']['upload_folder']
            if not os.path.isdir(extract_dir):
                return make_response(
                    jsonify({'Message': "'upload_folder' in API config is not "
                             "a valid folder!"}),
                    HTTP_BAD_REQUEST)

            # Get password if present
            if 'archive-password' in request.form:
                password = request.form['archive-password']
                if PY3:
                    password = bytes(password, 'utf-8')
            else:
                password = ''
        else:
            metadata[key] = request.form[key]

    if extract_dir:
        # Extract a zip
        if zipfile.is_zipfile(file_):
            z = zipfile.ZipFile(file_)
            try:
                # NOTE: zipfile module prior to Py 2.7.4 is insecure!
                # https://docs.python.org/2/library/zipfile.html#zipfile.ZipFile.extract
                z.extractall(path=extract_dir, pwd=password)
                for uzfile in z.namelist():
                    unzipped_file = open(os.path.join(extract_dir, uzfile))
                    f_name, full_path = save_hashed_filename(unzipped_file, True)
                    tid = queue_task(uzfile, f_name, full_path, metadata, rescan=rescan)
                    task_id_list.append(tid)
            except RuntimeError as e:
                msg = "ERROR: Failed to extract " + str(file_) + ' - ' + str(e)
                return make_response(
                    jsonify({'Message': msg}),
                    HTTP_BAD_REQUEST)
        # Extract a rar
        elif rarfile.is_rarfile(file_):
            r = rarfile.RarFile(file_)
            try:
                r.extractall(path=extract_dir, pwd=password)
                for urfile in r.namelist():
                    unrarred_file = open(os.path.join(extract_dir, urfile))
                    f_name, full_path = save_hashed_filename(unrarred_file, True)
                    tid = queue_task(urfile, f_name, full_path, metadata, rescan=rescan)
                    task_id_list.append(tid)
            except RuntimeError as e:
                msg = "ERROR: Failed to extract " + str(file_) + ' - ' + str(e)
                return make_response(
                    jsonify({'Message': msg}),
                    HTTP_BAD_REQUEST)
    else:
        # File was not an archive to extract
        f_name, full_path = save_hashed_filename(file_)
        tid = queue_task(original_filename, f_name, full_path, metadata, rescan=rescan)
        task_id_list = [tid]

    msg = {'task_ids': task_id_list}
    return make_response(
        jsonify({'Message': msg}),
        HTTP_CREATED
    )
示例#18
0
def multiscan(Files, recursive=False, configregen=False, configfile=CONFIG, config=None, module_list=None):
    """
    The meat and potatoes. Returns the list of module results

    Files - A list of files and dirs to be scanned
    recursive - If true it will search the dirs in Files recursively
    configregen - If True a new config file will be created overwriting the old
    configfile - What config file to use. Can be None.
    config - A dictionary containing the configuration options to be used.
    module_list - A list of file paths to be used as modules. Each string should end in .py
    """
    # Redirect stdout to stderr
    stdout = sys.stdout
    sys.stdout = sys.stderr
    # TODO: Make sure the cleanup from this works is something breaks

    # Init some vars
    # If recursive is False we don't parse the file list and take it as is.
    if recursive:
        filelist = parseFileList(Files, recursive=recursive)
    else:
        filelist = Files
    # A list of files in the module dir
    if module_list is None:
        module_list = parseDir(MODULESDIR, recursive=True, exclude=["__init__"])
    # A dictionary used for the copyfileto parameter
    filedic = {}
    # What will be the config file object
    config_object = None

    # Read in config
    if configfile:
        config_object = configparser.SafeConfigParser()
        config_object.optionxform = str
        # Regen the config if needed or wanted
        if configregen or not os.path.isfile(configfile):
            _rewrite_config(module_list, config_object, filepath=configfile)

        config_object.read(configfile)
        main_config = _get_main_config(config_object, filepath=configfile)
        if config:
            file_conf = parse_config(config_object)
            for key in config:
                if key not in file_conf:
                    file_conf[key] = config[key]
                    file_conf[key]['_load_default'] = True
                else:
                    file_conf[key].update(config[key])
            config = file_conf
        else:
            config = parse_config(config_object)
    else:
        if config is None:
            config = {}
        else:
            config['_load_default'] = True
        if 'main' in config:
            main_config = config['main']
        else:
            main_config = DEFAULTCONF

    # If none of the files existed
    if not filelist:
        sys.stdout = stdout
        raise ValueError("No valid files")

    # Copy files to a share if configured
    if "copyfilesto" not in main_config:
        main_config["copyfilesto"] = False
    if main_config["copyfilesto"]:
        if os.path.isdir(main_config["copyfilesto"]):
            filelist = _copy_to_share(filelist, filedic, main_config["copyfilesto"])
        else:
            sys.stdout = stdout
            raise IOError('The copyfilesto dir "' + main_config["copyfilesto"] + '" is not a valid dir')

    # Create the global module interface
    global_module_interface = _GlobalModuleInterface()

    # Start a thread for each module
    thread_list = _start_module_threads(filelist, module_list, config, global_module_interface)

    # Write the default configure settings for missing ones
    if config_object:
        _write_missing_module_configs(module_list, config_object, filepath=configfile)

    # Warn about spaces in file names
    for f in filelist:
        if ' ' in f:
            print('WARNING: You are using file paths with spaces. This may result in modules not reporting correctly.')
            break

    # Wait for all threads to finish
    thread_wait_list = thread_list[:]
    i = 0
    while thread_wait_list:
        i += 1
        for thread in thread_wait_list:
            if not thread.is_alive():
                i = 0
                thread_wait_list.remove(thread)
                if VERBOSE:
                    print(thread.name, "took", thread.endtime - thread.starttime)
        if i == 15:
            i = 0
            if VERBOSE:
                p = 'Waiting on'
                for thread in thread_wait_list:
                    p += ' ' + thread.name
                p += '...'
                print(p)
        time.sleep(1)

    # Delete copied files
    if main_config["copyfilesto"]:
        for item in filelist:
            try:
                os.remove(item)
            except OSError:
                pass

    # Get Result list
    results = []
    for thread in thread_list:
        if thread.ret is not None:
            results.append(thread.ret)
        del thread

    # Translates file names back to the originals
    if filedic:
        # I have no idea if this is the best way to do in-place modifications
        for i in range(0, len(results)):
            (result, metadata) = results[i]
            modded = False
            for j in range(0, len(result)):
                (filename, hit) = result[j]
                base = basename(filename)
                if base in filedic:
                    filename = filedic[base]
                    modded = True
                    result[j] = (filename, hit)
            if modded:
                results[i] = (result, metadata)

    # Scan subfiles if needed
    subscan_list = global_module_interface._get_subscan_list()
    if subscan_list:
        # Translate from_filename back to original if needed
        if filedic:
            for i in range(0, len(subscan_list)):
                file_path, from_filename, module_name = subscan_list[i]
                base = basename(from_filename)
                if base in filedic:
                    from_filename = filedic[base]
                    subscan_list[i] = (file_path, from_filename, module_name)

        results.extend(_subscan(subscan_list, config, main_config, module_list, global_module_interface))

    global_module_interface._cleanup()

    # Return stdout to previous state
    sys.stdout = stdout
    return results
示例#19
0
 def setup_class(cls):
     cls.real_mod_dir = multiscanner.MODULESDIR
     multiscanner.MODULEDIR = os.path.join(CWD, "modules")
     cls.filelist = utils.parseDir(os.path.join(CWD, 'files'))
     multiscanner.CONFIG = '.tmpfile.ini'
示例#20
0
from __future__ import division, absolute_import, with_statement, print_function, unicode_literals
import os
import tempfile

import multiscanner
from multiscanner.common import utils

# Makes sure we use the multiscanner in ../
CWD = os.path.dirname(os.path.abspath(__file__))

module_list = [os.path.join(CWD, 'modules', 'test_conf.py')]
filelist = utils.parseDir(os.path.join(CWD, 'files'))


def test_no_config():
    results, metadata = multiscanner.multiscan(
        filelist, configfile=None, config=None,
        recursive=None, module_list=module_list)[0]
    assert metadata['conf'] == {'a': 'b', 'c': 'd'}


def test_config_api_no_file():
    config = {'test_conf': {'a': 'z'}}
    results, metadata = multiscanner.multiscan(
        filelist, configfile=None, config=config,
        recursive=None, module_list=module_list)[0]
    assert metadata['conf'] == {'a': 'z', 'c': 'd'}


def test_config_api_with_empty_file():
    config = {'test_conf': {'a': 'z'}}
示例#21
0
 def setup_class(cls):
     cls.real_mod_dir = multiscanner.MODULESDIR
     multiscanner.MODULESDIR = os.path.join(CWD, "modules")
     cls.filelist = utils.parseDir(os.path.join(CWD, 'files'))
     cls.files = ['a', 'b', 'C:\\c', '/d/d']
     cls.threadDict = {}
示例#22
0
def scan(filelist, conf=DEFAULTCONF):
    ruleDir = conf["ruledir"]
    extlist = conf["fileextensions"]
    includes = 'includes' in conf and conf['includes']

    ruleset = {}
    rules = parseDir(ruleDir, recursive=True)
    for r in rules:
        for ext in extlist:
            if r.endswith(ext):
                full_path = os.path.abspath(os.path.join(ruleDir, r))
                ruleset[full_path] = full_path
                break

    # Ran into a weird issue with file locking, this fixes it
    goodtogo = False
    i = 0
    yararules = None
    while not goodtogo:
        try:
            yararules = yara.compile(filepaths=ruleset, includes=includes)
            goodtogo = True
        except yara.SyntaxError as e:
            bad_file = os.path.abspath(str(e).split('(')[0])
            if bad_file in ruleset:
                del ruleset[bad_file]
                print('WARNING: Yara', e)
            else:
                print(
                    'ERROR Yara: Invalid rule in', bad_file,
                    'but we are unable to remove it from our list. Aborting')
                print(e)
                return None

    matches = []
    for m in filelist:
        # Ran into a weird issue with file locking, this fixes it
        goodtogo = False
        i = 0
        while not goodtogo and i < 5:
            try:
                f = open(m, 'rb')
                goodtogo = True
            except Exception as e:
                print('yara:', e)
                time.sleep(3)
                i += 1
        try:
            hit = yararules.match(data=f.read())
        except Exception as e:
            # TODO: log exception
            continue
        finally:
            f.close()
        if hit:
            hdict = {}
            for h in hit:
                if not set(h.tags).intersection(set(conf["ignore-tags"])):
                    hit_dict = {
                        'meta': h.meta,
                        'namespace': h.namespace,
                        'rule': h.rule,
                        'tags': h.tags,
                    }
                    try:
                        h_key = '{}:{}'.format(
                            hit_dict['namespace'].split('/')[-1],
                            hit_dict['rule'])
                    except IndexError:
                        h_key = '{}'.format(hit_dict['rule'])
                    hdict[h_key] = hit_dict
            matches.append((m, hdict))

    metadata = {}
    rulelist = list(ruleset)
    rulelist.sort()
    metadata["Name"] = NAME
    metadata["Type"] = TYPE
    metadata["Rules"] = rulelist
    return (matches, metadata)
示例#23
0
 def setup_class(cls):
     cls.real_mod_dir = multiscanner.MODULESDIR
     multiscanner.MODULEDIR = os.path.join(CWD, "modules")
     cls.filelist = utils.parseDir(os.path.join(CWD, 'files'))
     multiscanner.CONFIG = '.tmpfile.ini'
示例#24
0
def test_parseDir():
    path = os.path.abspath(os.path.join(MS_WD, 'tests', 'dir_test'))
    result = utils.parseDir(path, recursive=False)
    expected = [os.path.join(path, '1.1.txt'), os.path.join(path, '1.2.txt')]
    assert sorted(result) == sorted(expected)