示例#1
0
def test_no_config():
    results, metadata = multiscanner.multiscan(filelist,
                                               configfile=None,
                                               config=None,
                                               recursive=None,
                                               module_list=module_list)[0]
    assert metadata['conf'] == {'a': 'b', 'c': 'd'}
示例#2
0
def test_config_api_with_real_file():
    config = {'test_conf': {'a': 'z'}}
    config_file = tempfile.mkstemp()[1]
    multiscanner.config_init(config_file)
    results, metadata = multiscanner.multiscan(filelist, configfile=config_file, config=config, recursive=None, module_list=module_list)[0]
    os.remove(config_file)
    assert metadata['conf'] == {'a': 'z', 'c': 'd'}
示例#3
0
def multiscanner_process(work_queue, exit_signal):
    '''Not used in distributed mode.
    '''
    metadata_list = []
    time_stamp = None
    while True:
        time.sleep(1)
        try:
            metadata_list.append(work_queue.get_nowait())
            if not time_stamp:
                time_stamp = time.time()
            while len(metadata_list) < batch_size:
                metadata_list.append(work_queue.get_nowait())
        except queue.Empty:
            if metadata_list and time_stamp:
                if len(metadata_list) >= batch_size:
                    pass
                elif time.time() - time_stamp > batch_interval:
                    pass
                else:
                    continue
            else:
                continue

        filelist = [item[0] for item in metadata_list]
        # modulelist = [item[5] for item in metadata_list]
        resultlist = multiscanner.multiscan(
            filelist, configfile=multiscanner.CONFIG
            # module_list
        )
        results = multiscanner.parse_reports(resultlist, python=True)

        scan_time = datetime.now().isoformat()

        if delete_after_scan:
            for file_name in results:
                os.remove(file_name)

        for item in metadata_list:
            # Use the original filename as the index instead of the full path
            results[item[1]] = results[item[0]]
            del results[item[0]]

            results[item[1]]['Scan Metadata'] = item[4]
            results[item[1]]['Scan Metadata']['Scan Time'] = scan_time
            results[item[1]]['Scan Metadata']['Task ID'] = item[2]

            db.update_task(
                task_id=item[2],
                task_status='Complete',
                timestamp=scan_time,
            )
        metadata_list = []

        storage_handler.store(results, wait=False)

        filelist = []
        time_stamp = None
    storage_handler.close()
示例#4
0
def celery_task(files, config=multiscanner.CONFIG):
    '''
    Run multiscanner on the given file and store the results in the storage
    handler(s) specified in the storage configuration file.
    '''
    # Get the storage config
    storage_conf = multiscanner.common.get_config_path(config, 'storage')
    storage_handler = multiscanner.storage.StorageHandler(
        configfile=storage_conf)

    resultlist = multiscanner.multiscan(list(files), configfile=config)
    results = multiscanner.parse_reports(resultlist, python=True)

    scan_time = datetime.now().isoformat()

    # Loop through files in a way compatible with Py 2 and 3, and won't be
    # affected by changing keys to original filenames
    for file_ in files:
        original_filename = files[file_]['original_filename']
        task_id = files[file_]['task_id']
        file_hash = files[file_]['file_hash']
        metadata = files[file_]['metadata']
        # Get the Scan Config that the task was run with and
        # add it to the task metadata
        scan_config_object = configparser.SafeConfigParser()
        scan_config_object.optionxform = str
        scan_config_object.read(config)
        full_conf = common.parse_config(scan_config_object)
        sub_conf = {}
        for key in full_conf:
            if key == 'main':
                continue
            sub_conf[key] = {}
            sub_conf[key]['ENABLED'] = full_conf[key]['ENABLED']
        results[file_]['Scan Metadata'] = {}
        results[file_]['Scan Metadata']['Worker Node'] = gethostname()
        results[file_]['Scan Metadata']['Scan Config'] = sub_conf

        # Use the original filename as the value for the filename
        # in the report (instead of the tmp path assigned to the file
        # by the REST API)
        results[original_filename] = results[file_]
        del results[file_]

        results[original_filename]['Scan Time'] = scan_time
        results[original_filename]['Metadata'] = metadata

        # Update the task DB to reflect that the task is done
        db.update_task(
            task_id=task_id,
            task_status='Complete',
            timestamp=scan_time,
        )

    # Save the reports to storage
    storage_handler.store(results, wait=False)
    storage_handler.close()

    return results
示例#5
0
def test_config_api_no_file():
    config = {'test_conf': {'a': 'z'}}
    results, metadata = multiscanner.multiscan(filelist,
                                               configfile=None,
                                               config=config,
                                               recursive=None,
                                               module_list=module_list)[0]
    assert metadata['conf'] == {'a': 'z', 'c': 'd'}
示例#6
0
 def setup(self):
     self.result = multiscanner.multiscan(self.filelist,
                                          recursive=False,
                                          configregen=False,
                                          configfile='.tmpfile.ini')
     self.report = multiscanner.parse_reports(self.result,
                                              includeMetadata=False,
                                              python=True)
     self.report_m = multiscanner.parse_reports(self.result,
                                                includeMetadata=True,
                                                python=True)
示例#7
0
def test_subscan():
    m = multiscanner.multiscan(
        ['fake.zip'],
        recursive=None,
        configfile=None,
        module_list=[os.path.join(CWD, 'modules', 'test_subscan.py')])
    assert m == [([(u'fake.zip', 0)], {
        'Type': 'Test',
        'Name': 'test_subscan'
    }),
                 ([(u'fake.zip/0', u'fake.zip')], {
                     u'Include': False,
                     u'Type': u'subscan',
                     u'Name': u'Parent'
                 }),
                 ([(u'fake.zip', [u'fake.zip/0'])], {
                     u'Include': False,
                     u'Type': u'subscan',
                     u'Name': u'Children'
                 }),
                 ([(u'fake.zip/0', u'test_subscan')], {
                     u'Include': False,
                     u'Type': u'subscan',
                     u'Name': u'Created by'
                 }),
                 ([(u'fake.zip/0', 1)], {
                     'Type': 'Test',
                     'Name': 'test_subscan'
                 }),
                 ([(u'fake.zip/0/1', u'fake.zip/0')], {
                     u'Include': False,
                     u'Type': u'subscan',
                     u'Name': u'Parent'
                 }),
                 ([(u'fake.zip/0', [u'fake.zip/0/1'])], {
                     u'Include': False,
                     u'Type': u'subscan',
                     u'Name': u'Children'
                 }),
                 ([(u'fake.zip/0/1', u'test_subscan')], {
                     u'Include': False,
                     u'Type': u'subscan',
                     u'Name': u'Created by'
                 }),
                 ([(u'fake.zip/0/1', 2)], {
                     'Type': 'Test',
                     'Name': 'test_subscan'
                 })]  # noqa: E501
示例#8
0
def multiscanner_process(work_queue, exit_signal):
    metadata_list = []
    time_stamp = None
    while True:
        time.sleep(1)
        try:
            metadata_list.append(work_queue.get_nowait())
            if not time_stamp:
                time_stamp = time.time()
            while len(metadata_list) < BATCH_SIZE:
                metadata_list.append(work_queue.get_nowait())
        except queue.Empty:
            if metadata_list and time_stamp:
                if len(metadata_list) >= BATCH_SIZE:
                    pass
                elif time.time() - time_stamp > WAIT_SECONDS:
                    pass
                else:
                    continue
            else:
                continue

        filelist = [item[0] for item in metadata_list]
        resultlist = multiscanner.multiscan(
            filelist, configfile=multiscanner.CONFIG
        )
        results = multiscanner.parse_reports(resultlist, python=True)

        for file_name in results:
            os.remove(file_name)

        for item in metadata_list:

            results[item[1]] = results[item[0]]
            del results[item[0]]

            db.update_task(
                task_id=item[2],
                task_status='Complete',
                report_id=item[3]
            )

        storage_handler.store(results, wait=False)

        filelist = []
        time_stamp = None
    storage_handler.close()
示例#9
0
def multiscanner_process(work_queue, exit_signal):
    metadata_list = []
    time_stamp = None
    while True:
        time.sleep(1)
        try:
            metadata_list.append(work_queue.get_nowait())
            if not time_stamp:
                time_stamp = time.time()
            while len(metadata_list) < BATCH_SIZE:
                metadata_list.append(work_queue.get_nowait())
        except queue.Empty:
            if metadata_list and time_stamp:
                if len(metadata_list) >= BATCH_SIZE:
                    pass
                elif time.time() - time_stamp > WAIT_SECONDS:
                    pass
                else:
                    continue
            else:
                continue

        filelist = [item[0] for item in metadata_list]
        resultlist = multiscanner.multiscan(filelist,
                                            configfile=multiscanner.CONFIG)
        results = multiscanner.parse_reports(resultlist, python=True)

        for file_name in results:
            os.remove(file_name)

        for item in metadata_list:

            results[item[1]] = results[item[0]]
            del results[item[0]]

            db.update_task(task_id=item[2],
                           task_status='Complete',
                           report_id=item[3])

        storage_handler.store(results, wait=False)

        filelist = []
        time_stamp = None
    storage_handler.close()
示例#10
0
def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete,
                         exit_signal):
    filelist = []
    time_stamp = None
    storage_conf = multiscanner.common.get_config_path(config, 'storage')
    storage_handler = multiscanner.storage.StorageHandler(
        configfile=storage_conf)
    while not exit_signal.value:
        time.sleep(1)
        try:
            filelist.append(work_queue.get_nowait())
            if not time_stamp:
                time_stamp = time.time()
            while len(filelist) < batch_size:
                filelist.append(work_queue.get_nowait())
        except queue.Empty:
            if filelist and time_stamp:
                if len(filelist) >= batch_size:
                    pass
                elif time.time() - time_stamp > wait_seconds:
                    pass
                else:
                    continue
            else:
                continue

        resultlist = multiscanner.multiscan(filelist, configfile=config)
        results = multiscanner.parse_reports(resultlist, python=True)
        if delete:
            for file_name in results:
                os.remove(file_name)

        storage_handler.store(results, wait=False)
        print('Scanned', len(results), 'files')

        filelist = []
        time_stamp = None
    storage_handler.close()
示例#11
0
def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, exit_signal):
    filelist = []
    time_stamp = None
    storage_conf = utils.get_config_path(config, 'storage')
    storage_handler = storage.StorageHandler(configfile=storage_conf)
    while not exit_signal.value:
        time.sleep(1)
        try:
            filelist.append(work_queue.get_nowait())
            if not time_stamp:
                time_stamp = time.time()
            while len(filelist) < batch_size:
                filelist.append(work_queue.get_nowait())
        except queue.Empty:
            if filelist and time_stamp:
                if len(filelist) >= batch_size:
                    pass
                elif time.time() - time_stamp > wait_seconds:
                    pass
                else:
                    continue
            else:
                continue

        resultlist = multiscan(filelist, configfile=config)
        results = parse_reports(resultlist, python=True)
        if delete:
            for file_name in results:
                os.remove(file_name)

        storage_handler.store(results, wait=False)
        print('Scanned', len(results), 'files')

        filelist = []
        time_stamp = None
    storage_handler.close()
def test_subscan():
    m = multiscanner.multiscan(
        ['fake.zip'], recursive=None, configfile=None,
        module_list=[os.path.join(CWD, 'modules', 'test_subscan.py')])
    assert m == [([(u'fake.zip', 0)], {'Type': 'Test', 'Name': 'test_subscan'}), ([(u'fake.zip/0', u'fake.zip')], {u'Include': False, u'Type': u'subscan', u'Name': u'Parent'}), ([(u'fake.zip', [u'fake.zip/0'])], {u'Include': False, u'Type': u'subscan', u'Name': u'Children'}), ([(u'fake.zip/0', u'test_subscan')], {u'Include': False, u'Type': u'subscan', u'Name': u'Created by'}), ([(u'fake.zip/0', 1)], {'Type': 'Test', 'Name': 'test_subscan'}), ([(u'fake.zip/0/1', u'fake.zip/0')], {u'Include': False, u'Type': u'subscan', u'Name': u'Parent'}), ([(u'fake.zip/0', [u'fake.zip/0/1'])], {u'Include': False, u'Type': u'subscan', u'Name': u'Children'}), ([(u'fake.zip/0/1', u'test_subscan')], {u'Include': False, u'Type': u'subscan', u'Name': u'Created by'}), ([(u'fake.zip/0/1', 2)], {'Type': 'Test', 'Name': 'test_subscan'})]    # noqa: E501
示例#13
0
 def setup(self):
     self.result = multiscanner.multiscan(
         self.filelist, recursive=False, configregen=False, configfile='.tmpfile.ini')
     self.report = multiscanner.parse_reports(self.result, includeMetadata=False, python=True)
     self.report_m = multiscanner.parse_reports(self.result, includeMetadata=True, python=True)
示例#14
0
def multiscanner_process(work_queue, exit_signal):
    '''Not used in distributed mode.
    '''
    metadata_list = []
    time_stamp = None
    while True:
        time.sleep(1)
        try:
            metadata_list.append(work_queue.get_nowait())
            if not time_stamp:
                time_stamp = time.time()
            while len(metadata_list) < batch_size:
                metadata_list.append(work_queue.get_nowait())
        except queue.Empty:
            if metadata_list and time_stamp:
                if len(metadata_list) >= batch_size:
                    pass
                elif time.time() - time_stamp > batch_interval:
                    pass
                else:
                    continue
            else:
                continue

        filelist = [item[0] for item in metadata_list]
        # modulelist = [item[5] for item in metadata_list]
        resultlist = multiscan(
            filelist, configfile=MS_CONFIG
            # module_list
        )
        results = parse_reports(resultlist, python=True)

        scan_time = datetime.now().isoformat()

        if delete_after_scan:
            for file_name in results:
                os.remove(file_name)

        for item in metadata_list:
            # Use the original filename as the index instead of the full path
            results[item[1]] = results[item[0]]
            del results[item[0]]

            results[item[1]]['Scan Metadata'] = item[4]
            results[item[1]]['Scan Metadata']['Scan Time'] = scan_time
            results[item[1]]['Scan Metadata']['Task ID'] = item[2]
            results[item[1]]['tags'] = results[item[1]]['Scan Metadata'].get('Tags', '').split(',')
            results[item[1]]['Scan Metadata'].pop('Tags', None)

            db.update_task(
                task_id=item[2],
                task_status='Complete',
                timestamp=scan_time,
            )
        metadata_list = []

        storage_handler.store(results, wait=False)

        filelist = []
        time_stamp = None
    storage_handler.close()
示例#15
0
def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, config=multiscanner.CONFIG, module_list=None):
    '''
    Queue up multiscanner tasks

    Usage:
    from celery_worker import multiscanner_celery
    multiscanner_celery.delay(full_path, original_filename, task_id,
                              hashed_filename, metadata, config, module_list)
    '''
    # Initialize the connection to the task DB
    db.init_db()

    logger.info('\n\n{}{}Got file: {}.\nOriginal filename: {}.\n'.format('='*48, '\n', file_hash, original_filename))

    # Get the storage config
    storage_conf = multiscanner.common.get_config_path(config, 'storage')
    storage_handler = multiscanner.storage.StorageHandler(configfile=storage_conf)

    resultlist = multiscanner.multiscan(
        [file_],
        configfile=config,
        module_list=module_list
    )
    results = multiscanner.parse_reports(resultlist, python=True)

    scan_time = datetime.now().isoformat()

    # Get the Scan Config that the task was run with and
    # add it to the task metadata
    scan_config_object = configparser.SafeConfigParser()
    scan_config_object.optionxform = str
    scan_config_object.read(config)
    full_conf = common.parse_config(scan_config_object)
    sub_conf = {}
    # Count number of modules enabled out of total possible
    # and add it to the Scan Metadata
    total_enabled = 0
    total_modules = 0
    for key in full_conf:
        if key == 'main':
            continue
        sub_conf[key] = {}
        sub_conf[key]['ENABLED'] = full_conf[key]['ENABLED']
        total_modules += 1
        if sub_conf[key]['ENABLED'] is True:
            total_enabled += 1

    results[file_]['Scan Metadata'] = {}
    results[file_]['Scan Metadata']['Worker Node'] = gethostname()
    results[file_]['Scan Metadata']['Scan Config'] = sub_conf
    results[file_]['Scan Metadata']['Modules Enabled'] = '{} / {}'.format(
        total_enabled, total_modules
    )

    # Use the original filename as the value for the filename
    # in the report (instead of the tmp path assigned to the file
    # by the REST API)
    results[original_filename] = results[file_]
    del results[file_]

    results[original_filename]['Scan Time'] = scan_time
    results[original_filename]['Metadata'] = metadata

    # Update the task DB to reflect that the task is done
    db.update_task(
        task_id=task_id,
        task_status='Complete',
        timestamp=scan_time,
    )

    # Save the reports to storage
    storage_handler.store(results, wait=False)
    storage_handler.close()
    logger.info('Completed Task #{}'.format(task_id))

    return results
示例#16
0
def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata,
                        config=MS_CONFIG, module_list=None):
    '''
    Queue up multiscanner tasks

    Usage:
    from celery_worker import multiscanner_celery
    multiscanner_celery.delay(full_path, original_filename, task_id,
                              hashed_filename, metadata, config, module_list)
    '''

    # Initialize the connection to the task DB
    db.init_db()

    logger.info('\n\n{}{}Got file: {}.\nOriginal filename: {}.\n'.format('=' * 48, '\n', file_hash, original_filename))

    # Get the storage config
    storage_conf = utils.get_config_path(config, 'storage')
    storage_handler = storage.StorageHandler(configfile=storage_conf)

    resultlist = multiscan(
        [file_],
        configfile=config,
        module_list=module_list
    )
    results = parse_reports(resultlist, python=True)

    scan_time = datetime.now().isoformat()

    # Get the Scan Config that the task was run with and
    # add it to the task metadata
    scan_config_object = configparser.SafeConfigParser()
    scan_config_object.optionxform = str
    scan_config_object.read(config)
    full_conf = utils.parse_config(scan_config_object)
    sub_conf = {}
    # Count number of modules enabled out of total possible
    # and add it to the Scan Metadata
    total_enabled = 0
    total_modules = len(full_conf.keys())

    # Get the count of modules enabled from the module_list
    # if it exists, else count via the config
    if module_list:
        total_enabled = len(module_list)
    else:
        for key in full_conf:
            if key == 'main':
                continue
            sub_conf[key] = {}
            sub_conf[key]['ENABLED'] = full_conf[key]['ENABLED']
            if sub_conf[key]['ENABLED'] is True:
                total_enabled += 1

    results[file_]['Scan Metadata'] = metadata
    results[file_]['Scan Metadata']['Worker Node'] = gethostname()
    results[file_]['Scan Metadata']['Scan Config'] = sub_conf
    results[file_]['Scan Metadata']['Modules Enabled'] = '{} / {}'.format(
        total_enabled, total_modules
    )
    results[file_]['Scan Metadata']['Scan Time'] = scan_time
    results[file_]['Scan Metadata']['Task ID'] = task_id

    # Use the original filename as the value for the filename
    # in the report (instead of the tmp path assigned to the file
    # by the REST API)
    results[original_filename] = results[file_]
    del results[file_]

    # Save the reports to storage
    storage_ids = storage_handler.store(results, wait=False)
    storage_handler.close()

    # Only need to raise ValueError here,
    # Further cleanup will be handled by the on_failure method
    # of MultiScannerTask
    if not storage_ids:
        raise ValueError('Report failed to index')

    # Update the task DB to reflect that the task is done
    db.update_task(
        task_id=task_id,
        task_status='Complete',
        timestamp=scan_time,
    )

    logger.info('Completed Task #{}'.format(task_id))

    return results
示例#17
0
    Parses arguments
    """
    import argparse
    import parser
    #argparse stuff
    parser = argparse.ArgumentParser(description="Scan files and store results in elastic search")
    parser.add_argument("-r", "--recursive", action="store_true")
    parser.add_argument("-v", "--verbose", action="store_true")
    parser.add_argument('Files', help="Files and Directories to attach", nargs='+')
    return parser.parse_args()

def results2es(results):
    """
    Takes a dictionary of Filename: {Results} and stores it in elastic search.
    """
    es = elasticsearch.Elasticsearch(hosts=ES_HOSTS)
    es.indices.create(index=ES_INDEX, ignore=400)
    for fname in results:
        result = results[fname]
        result['filename'] = fname
        es.index(index=ES_INDEX, doc_type=ES_DOCTYPE, id=result['SHA256'], body=result)

if __name__ == '__main__':
    args = parse_args()
    print "Starting scan..."
    results = multiscanner.multiscan(args.Files, recursive=args.recursive)
    results = multiscanner.parseReports(results, python=True, includeMetadata=False)
    print "Storing results..."
    results2es(results)
    print "Done!"
示例#18
0
def test_no_config():
    results, metadata = multiscanner.multiscan(
        filelist, configfile=None, config=None,
        recursive=None, module_list=module_list)[0]
    assert metadata['conf'] == {'a': 'b', 'c': 'd'}
示例#19
0
def test_config_api_no_file():
    config = {'test_conf': {'a': 'z'}}
    results, metadata = multiscanner.multiscan(
        filelist, configfile=None, config=config,
        recursive=None, module_list=module_list)[0]
    assert metadata['conf'] == {'a': 'z', 'c': 'd'}