示例#1
0
 def test_dont_dest_archive_payload(self):
     s = Stoq(base_dir=utils.get_data_dir(),
              dest_archivers=['dummy_archiver'])
     dummy_archiver = s.load_plugin('dummy_archiver')
     dummy_archiver.archive = create_autospec(dummy_archiver.archive,
                                              return_value=None)
     response = s.scan(
         self.generic_content,
         payload_meta=PayloadMeta(should_archive=False),
         add_start_dispatch=['extract_random'],
         request_meta=RequestMeta(archive_payloads=True),
     )
     dummy_archiver.archive.assert_called_once()
     self.assertNotIn('dummy_archiver',
                      response.results[0].plugins_run['archivers'])
     self.assertIn('dummy_archiver',
                   response.results[1].plugins_run['archivers'])
示例#2
0
    def scan(self, payload: Payload,
             request_meta: RequestMeta) -> WorkerResponse:

        extracted = []
        tnef_results = TNEF(payload.content)

        if tnef_results.attachments:
            for tnef_attachment in tnef_results.attachments:
                try:
                    filename = UnicodeDammit(
                        tnef_attachment.name).unicode_markup
                except:
                    filename = "None"
                tnef_meta = PayloadMeta(extra_data={'filename': filename})
                attachment = ExtractedPayload(tnef_attachment.data, tnef_meta)
                extracted.extend(attachment)

        return WorkerResponse({}, extracted=extracted)
示例#3
0
 def scan(self, payload: Payload,
          request_meta: RequestMeta) -> WorkerResponse:
     extracted = []
     errors = []
     try:
         parsed_xml = parseString(payload.content)
     except ExpatError as err:
         errors.append(
             f'Unable to parse payload as XML with xdpcarve: {err}')
         return WorkerResponse(errors=errors)
     for name in self.elements:
         dom_element = parsed_xml.getElementsByTagName(name)
         for dom in dom_element:
             content = dom.firstChild.nodeValue
             content = content.rstrip()
             try:
                 content = base64.b64decode(content)
             except:
                 pass
             meta = PayloadMeta(extra_data={"element_name": name})
             extracted.append(ExtractedPayload(content, meta))
     return WorkerResponse(extracted=extracted, errors=errors)
示例#4
0
    async def scan(self, payload: Payload, request: Request) -> WorkerResponse:
        """
        Carve PE files from provided payload

        """

        extracted = []
        content = BytesIO(payload.content)
        content.seek(0)

        for start, end in self._carve(content):
            content.seek(start)
            try:
                pe = pefile.PE(data=content.read())
            except:
                continue
            meta = PayloadMeta(extra_data={'offset': start})
            extracted.append(ExtractedPayload(pe.trim(), meta))
            content.seek(0)
            pe.close()

        return WorkerResponse(extracted=extracted)
示例#5
0
    async def ingest(self, queue: Queue) -> None:
        """
        Monitor a directory for newly created files for ingest

        """

        self.log.info(
            f'Monitoring {self.source_dir} for newly created files...')
        async for changes in awatch(self.source_dir):
            for change in list(changes):
                event = change[0]
                src_path = os.path.abspath(change[1])
                # Only handle Change.added
                if event != 1:
                    continue
                meta = PayloadMeta(
                    extra_data={
                        'filename': os.path.basename(src_path),
                        'source_dir': os.path.dirname(src_path),
                    })
                with open(src_path, 'rb') as f:
                    payload = Payload(f.read(), meta)
                    await queue.put(payload)
示例#6
0
    def decompress(self, content: BytesIO, offset: int = 0):
        """
        Extract and decompress an SWF object

        """
        errors = []
        meta = None
        swf = None
        try:
            """
            Header as obtained from SWF File Specification:
            Field Type Comment
            Signature UI8 Signature byte:
                - “F” indicates uncompressed
                - “C” indicates a zlib compressed SWF (SWF 6 and later only)
                - “Z” indicates a LZMA compressed SWF (SWF 13 and later only)
            - Signature UI8 Signature byte always “W”
            - Signature UI8 Signature byte always “S”
            - Version UI8 Single byte file version (for example, 0x06 for SWF 6)
            - FileLength UI32 Length of entire file in bytes
            """
            # Jump to the proper offset
            content.seek(offset)
            # Grab the first three bytes, should be FWS, CWS or ZWS
            magic = content.read(3).decode()
            # Grab the SWF version - 1 byte
            swf_version = struct.unpack('<b', content.read(1))[0]
            # Grab next 4 bytes so we can unpack to calculate the uncompressed
            # size of the payload.
            decompressed_size = struct.unpack("<i", content.read(4))[0] - 8
            # Let's go back to the offset byte, jumping beyond the SWF header
            content.seek(offset + 3)
            # Make sure our header is that of a decompressed SWF plus the
            # original version and size headers
            composite_header = b'FWS' + content.read(5)
            # Determine the compression type, ZLIB or LZMA, then decompress the
            # payload size minus 8 bytes of original header
            try:
                if magic == "ZWS":
                    content.seek(12)
                    content = pylzma.decompress(
                        content.read(decompressed_size))
                elif magic == "CWS":
                    content = zlib.decompress(content.read(decompressed_size))
                elif magic == 'FWS':
                    # Not compressed, but let's return the payload based on the
                    # size defined in the header
                    content = content.read(decompressed_size)
                else:
                    return None, errors
            except:
                return None, errors

            if len(content) != decompressed_size:
                errors.append(
                    'Invalid size of carved SWF content: {len(content)} != {decompressed_size}'
                )
            else:
                swf = composite_header + content
                meta = PayloadMeta(extra_data={
                    'offset': offset,
                    'swf_version': swf_version
                })
                extracted = ExtractedPayload(swf, meta)
        except:
            errors.append(
                f'Unable to decompress SWF payload at offset {offset}')
        return extracted, errors
示例#7
0
 def test_reconstruct_all_subresponses(self):
     # Construct a fake stoq_response as if it were generated from a file
     # A.zip that contains two files, B.txt and C.zip, where C.zip contains D.txt
     initial_response = StoqResponse(
         results=[
             PayloadResults(
                 payload_id="A.zip",
                 size=0,
                 payload_meta=PayloadMeta(),
                 workers=[{"fake": "result1"}],
                 plugins_run={"workers": [["fake"]]},
             ),
             PayloadResults(
                 payload_id="B.txt",
                 size=0,
                 payload_meta=PayloadMeta(),
                 workers=[{"fake": "result2"}],
                 plugins_run={"workers": [["fake"]]},
                 extracted_from="A.zip",
                 extracted_by="fake",
             ),
             PayloadResults(
                 payload_id="C.zip",
                 size=0,
                 payload_meta=PayloadMeta(),
                 workers=[{"fake": "result3"}],
                 plugins_run={"workers": [["fake"]]},
                 extracted_from="A.zip",
                 extracted_by="fake",
             ),
             PayloadResults(
                 payload_id="D.txt",
                 size=0,
                 payload_meta=PayloadMeta(),
                 workers=[{"fake": "result4"}],
                 plugins_run={"workers": [["fake"]]},
                 extracted_from="C.zip",
                 extracted_by="fake",
             ),
         ],
         request_meta=RequestMeta(extra_data={"check": "me"}),
         errors={},
     )
     s = Stoq(base_dir=utils.get_data_dir(), decorators=["simple_decorator"])
     all_subresponses = list(s.reconstruct_all_subresponses(initial_response))
     # We expect there to be four "artificial" responses generated, one for
     # each payload as the root.
     self.assertEqual(len(all_subresponses), 4)
     # We expect the first response to have all 4 payloads, the second response
     # to have just the second payload, the third response to have the third
     # and fourth payload, and the fourth response to have just the fourth payload
     self.assertEqual(
         [len(stoq_response.results) for stoq_response in all_subresponses], [4, 1, 2, 1]
     )
     self.assertEqual(
         [
             stoq_response.results[0].workers[0]["fake"]
             for stoq_response in all_subresponses
         ],
         ["result1", "result2", "result3", "result4"],
     )
     self.assertTrue(
         all(
             "simple_decorator" in stoq_response.decorators
             for stoq_response in all_subresponses
         )
     )
     # Assert that they all have the same scan ID
     self.assertEqual(
         len({stoq_response.scan_id for stoq_response in all_subresponses}), 1
     )
示例#8
0
 def test_payloadmeta_to_str(self):
     response = PayloadMeta()
     response_str = str(response)
     response_dict = json.loads(response_str)
     self.assertIsInstance(response_str, str)
     self.assertIsInstance(response_dict, dict)
示例#9
0
def main() -> None:
    about = f'stoQ :: v{__version__} :: an automated analysis framework'
    # If $STOQ_HOME exists, set our base directory to that, otherwise
    # use $HOME/.stoq
    try:
        stoq_home = str(
            Path(os.getenv('STOQ_HOME', f'{str(Path.home())}/.stoq')).resolve(
                strict=True
            )
        )
    except FileNotFoundError as err:
        print(f"$STOQ_HOME is invalid, exiting: {err}", file=sys.stderr)
        sys.exit(1)

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=about,
        epilog='''
Examples:

    - Scan a file with installed plugins and dispatch rules:

    $ %(prog)s scan mybadfile.exe

    - Scan a file and force it to go through the yara plugin:

    $ %(prog)s scan mybadfile.exe -s yara

    - Ingest from PubSub, force all payloads through yara, trid, and exif,
      then save results to file:

    $ %(prog)s run -a yara trid exif -P pubsub -C file

    - Monitor a directory (specified in dirmon.stoq) for newly created files
      send them to workers, and archive all payloads into MongoDB:

    $ %(prog)s run -P dirmon -A mongodb

    - Install a plugin from a directory

    $ %(prog)s install path/to/plugin_directory

    ''',
    )
    subparsers = parser.add_subparsers(title='commands', dest='command')
    subparsers.required = True

    scan = subparsers.add_parser('scan', help='Scan a given payload')
    scan.add_argument(
        'file',
        nargs='?',
        type=argparse.FileType('rb'),
        default=sys.stdin.buffer,
        help='File to scan, can also be provided from stdin',
    )

    run = subparsers.add_parser(
        'run', help='Continually ingest and scan payloads from Provider plugins'
    )
    run.add_argument(
        '-P', '--providers', nargs='+', help='Provider plugins to ingest payloads from'
    )

    # Add shared arguments so they still show up in the help dialog
    for subparser in [scan, run]:
        subparser.add_argument(
            '-A',
            '--dest-archivers',
            nargs='+',
            help='Archiver plugins to send payloads to',
        )
        subparser.add_argument(
            '-S',
            '--source-archivers',
            nargs='+',
            help='Archiver plugins to read payload from',
        )
        subparser.add_argument(
            '-D',
            '--decorators',
            nargs='+',
            help='Decorator plugins to send results to before saving',
        )
        subparser.add_argument(
            '-C', '--connectors', nargs='+', help='Connector plugins to send results to'
        )
        subparser.add_argument(
            '-R',
            '--dispatchers',
            nargs='+',
            help='Dispatcher plugins to use send payloads to',
        )
        subparser.add_argument(
            '-a',
            '--always-dispatch',
            nargs='+',
            help='Worker plugins to always dispatch plugins to',
        )
        subparser.add_argument(
            '-s',
            '--start-dispatch',
            nargs='+',
            help='Worker plugins to add to the original payload dispatch',
        )
        subparser.add_argument(
            '--max-recursion',
            type=int,
            default=None,
            help='Maximum level of recursion into a payload and extracted payloads',
        )
        subparser.add_argument('--plugin-opts', nargs='+', help='Plugin options')
        subparser.add_argument(
            '--request-source',
            default=None,
            help='Source name to add to initial scan request',
        )
        subparser.add_argument(
            '--request-extra',
            nargs='+',
            help='Key/value pair to add to initial scan request metadata',
        )
        subparser.add_argument(
            '--plugin-dir', nargs='+', help='Directory(ies) containing stoQ plugins'
        )
        subparser.add_argument(
            '--config-file',
            default=f'{stoq_home}/stoq.cfg',
            help='Path to stoQ configuration file',
        )
        subparser.add_argument(
            '--log-level',
            default=None,
            choices=['debug', 'info', 'warning', 'error' 'crtical'],
            help='Log level for stoQ events',
        )

    plugin_list = subparsers.add_parser('list', help='List available plugins')
    plugin_list.add_argument(
        '--plugin-dir', nargs='+', help='Directory(ies) containing stoQ plugins'
    )

    install = subparsers.add_parser('install', help='Install a given plugin')
    install.add_argument(
        'plugin_path', help='Directory or Github repo of the plugin to install'
    )
    install.add_argument(
        '--install_dir',
        default=os.path.join(stoq_home, 'plugins'),
        help='Override the default plugin installation directory',
    )
    install.add_argument(
        '--upgrade',
        action='store_true',
        help='Force the plugin to be upgraded if it already exists',
    )
    install.add_argument(
        '--github', action='store_true', help='Install plugin from Github repository'
    )

    subparsers.add_parser('test', help='Run stoQ tests')
    args = parser.parse_args()

    plugin_opts: Union[Dict, None] = None
    try:
        if args.plugin_opts:
            plugin_opts = {}
            for arg in args.plugin_opts:
                plugin_name, plugin_option = arg.split(':', 1)
                opt, value = plugin_option.split('=', 1)
                if value.lower() == 'true':
                    value = True
                elif value.lower() == 'false':
                    value = False
                if plugin_name in plugin_opts:
                    plugin_opts[plugin_name].update({opt: value})
                else:
                    plugin_opts[plugin_name] = {opt: value}
    except AttributeError:
        pass
    except ValueError as err:
        print(f'Failed parsing plugin option: {err}')

    request_meta = RequestMeta()
    try:
        if args.request_source:
            request_meta.source = args.request_source
        if args.request_extra:
            for arg in args.request_extra:
                extra_key, extra_value = arg.split('=', 1)
                if extra_value.lower() == 'true':
                    extra_value = True
                elif extra_value.lower() == 'false':
                    extra_value = False
                request_meta.extra_data[extra_key] = extra_value
    except AttributeError:
        pass
    except ValueError as err:
        print(f'Failed parsing request metadata option: {err}')

    try:
        if not os.path.isfile(args.config_file):
            print(f'Warning: {args.config_file} does not exist, using stoQ defaults!')
    except AttributeError:
        pass

    if args.command == 'scan':
        with args.file as f:
            # Verify that the file or stdin has some sort of data
            if not select.select([f], [], [], 0.0)[0]:
                print('Error: No content to scan was provided')
                sys.exit(2)
            content = f.read()
        if not content:
            print('Error: The provided content to scan was empty')
            sys.exit(2)

        if args.file.name == '<stdin>':
            filename = None
        else:
            path = args.file.name
            try:
                filename = os.path.basename(path.encode('utf-8'))
            except AttributeError:
                filename = os.path.basename(path)

        stoq = Stoq(
            base_dir=stoq_home,
            config_file=args.config_file,
            log_level=args.log_level,
            plugin_opts=plugin_opts,
            source_archivers=args.source_archivers,
            dest_archivers=args.dest_archivers,
            connectors=args.connectors,
            dispatchers=args.dispatchers,
            decorators=args.decorators,
            always_dispatch=args.always_dispatch,
            max_recursion=args.max_recursion,
            plugin_dir_list=args.plugin_dir,
        )
        response = asyncio.get_event_loop().run_until_complete(
            stoq.scan(
                content,
                PayloadMeta(extra_data={'filename': filename}),
                request_meta=request_meta,
                add_start_dispatch=args.start_dispatch,
            )
        )
        if not args.connectors:
            print(response)
    elif args.command == 'run':
        stoq = Stoq(
            base_dir=stoq_home,
            config_file=args.config_file,
            log_level=args.log_level,
            plugin_opts=plugin_opts,
            providers=args.providers,
            source_archivers=args.source_archivers,
            dest_archivers=args.dest_archivers,
            connectors=args.connectors,
            dispatchers=args.dispatchers,
            decorators=args.decorators,
            always_dispatch=args.always_dispatch,
            max_recursion=args.max_recursion,
            plugin_dir_list=args.plugin_dir,
        )
        asyncio.get_event_loop().run_until_complete(
            stoq.run(request_meta=request_meta, add_start_dispatch=args.start_dispatch)
        )
    elif args.command == 'list':
        stoq = Stoq(base_dir=stoq_home, plugin_dir_list=args.plugin_dir)
        print(about)
        print('-' * len(about))
        for name, info in stoq.list_plugins().items():
            print(f'{name:<20s} v{info["version"]:<10s}{info["description"]}')
            print(f'\t\t\t\t- {", ".join(info["classes"]):<20s}')

    elif args.command == 'install':
        StoqPluginInstaller.install(
            args.plugin_path, args.install_dir, args.upgrade, args.github
        )
        print(f'Successfully installed {args.plugin_path} into {args.install_dir}')
    elif args.command == 'test':
        test_path = os.path.dirname(tests.__file__)
        test_suite = unittest.TestLoader().discover(test_path)
        unittest.TextTestRunner(verbosity=1).run(test_suite)
示例#10
0
    def scan(self, payload: Payload,
             request_meta: RequestMeta) -> WorkerResponse:
        """
        Decompress a payload

        request_meta:
            - passwords
            - archiver
        """

        if len(payload.content) > self.maximum_size:
            raise StoqPluginException(
                f'Compressed file too large: {len(payload.content)} > {self.maximum_size}'
            )

        archiver = None
        mimetype = None
        results = {}
        errors = []
        extracted = []
        passwords = request_meta.extra_data.get('passwords', self.passwords)
        if isinstance(passwords, str):
            passwords = [p.strip() for p in passwords.split(',')]

        # Determine the mimetype of the payload so we can identify the
        # correct archiver. This should either be based off the request_meta
        # (useful when payload is passed via dispatching) or via magic
        if 'archiver' in request_meta.extra_data:
            if request_meta.extra_data['archiver'] in self.ARCHIVE_CMDS:
                archiver = self.ARCHIVE_CMDS[
                    request_meta.extra_data['archiver']]
            else:
                raise StoqPluginException(
                    f"Unknown archive type of {request_meta['archiver']}")
        else:
            mimetype = magic.from_buffer(payload.content, mime=True)
            if mimetype in self.ARCHIVE_MAGIC:
                archive_type = self.ARCHIVE_MAGIC[mimetype]
                if archive_type in self.ARCHIVE_CMDS:
                    archiver = self.ARCHIVE_CMDS[archive_type]
                else:
                    raise StoqPluginException(
                        f'Unknown archive type of {archive_type}')
        if not archiver:
            raise StoqPluginException(
                f'Unable to determine archive type, mimetype: {mimetype}')

        with tempfile.TemporaryDirectory() as extract_dir:
            fd, archive_file = tempfile.mkstemp(dir=extract_dir)
            with open(fd, 'xb') as f:
                f.write(payload.content)
                f.flush()
            archive_outdir = tempfile.mkdtemp(dir=extract_dir)
            for password in passwords:
                cmd = archiver.replace('%INFILE%', shlex.quote(archive_file))
                cmd = cmd.replace('%OUTDIR%', shlex.quote(archive_outdir))
                cmd = cmd.replace('%PASSWORD%', shlex.quote(password))
                cmd = cmd.split(" ")
                p = Popen(cmd,
                          stdout=PIPE,
                          stderr=PIPE,
                          universal_newlines=True)
                try:
                    outs, errs = p.communicate(timeout=self.timeout)
                except TimeoutExpired:
                    p.kill()
                    raise StoqPluginException(
                        'Timed out decompressing payload')
                if p.returncode == 0:
                    break

            for root, dirs, files in os.walk(archive_outdir):
                for f in files:
                    path = os.path.join(extract_dir, root, f)
                    if os.path.getsize(path) > self.maximum_size:
                        errors.append(
                            f'Extracted object is too large ({os.path.getsize(path)} > {self.maximum_size})'
                        )
                        continue
                    with open(path, "rb") as extracted_file:
                        meta = PayloadMeta(extra_data={'filename': f})
                        try:
                            data = extracted_file.read()
                        except OSError as err:
                            errors.append(
                                f'Unable to access extracted content: {err}')
                            continue
                        extracted.append(ExtractedPayload(data, meta))
        return WorkerResponse(results, errors=errors, extracted=extracted)
示例#11
0
    def scan(self, payload: Payload,
             request_meta: RequestMeta) -> WorkerResponse:
        message_json: Dict[str, str] = {}
        attachments: List[ExtractedPayload] = []
        errors: List[str] = []
        ioc_content: str = ''
        session = UnicodeDammit(payload.content).unicode_markup
        message = Parser(policy=policy.default).parsestr(session)

        # Create a dict of the SMTP headers
        for header, value in message.items():
            curr_header = header.lower()
            if curr_header in message_json:
                message_json[curr_header] += f'\n{value}'
            else:
                message_json[curr_header] = value

        if not self.omit_body:
            message_json['body'] = self._get_body(message, 'plain')
            message_json['body_html'] = self._get_body(message, 'html')

        if self.extract_iocs:
            for k in self.ioc_keys:
                if k in message_json:
                    ioc_content += f'\n{message_json[k]}'
                elif k == 'body' and k not in message_json:
                    b = self._get_body(message, 'plain')
                    if b:
                        ioc_content += b
                elif k == 'body_html' and k not in message_json:
                    b = self._get_body(message, 'html')
                    if b:
                        ioc_content += b

        for mailpart in message.iter_attachments():
            if mailpart.get_content_type() == 'message/rfc822':
                for part in mailpart.get_payload():
                    try:
                        attachment_meta = PayloadMeta(
                            should_archive=self.archive_attachments,
                            extra_data={
                                'charset':
                                part.get_content_charset(),
                                'content-description':
                                part.get('Content-Description'),
                                'disposition':
                                part.get_content_disposition(),
                                'filename':
                                part.get_filename(),
                                'type':
                                part.get_content_type(),
                            },
                            dispatch_to=['smtp'],
                        )
                        attachment = ExtractedPayload(part.as_bytes(),
                                                      attachment_meta)
                        attachments.append(attachment)
                    except Exception as err:
                        errors.append(f'Failed rfc822 attachment: {err}')
            else:
                try:
                    attachment_meta = PayloadMeta(
                        should_archive=self.archive_attachments,
                        extra_data={
                            'charset':
                            mailpart.get_content_charset(),
                            'content-description':
                            mailpart.get('Content-Description'),
                            'disposition':
                            mailpart.get_content_disposition(),
                            'filename':
                            mailpart.get_filename(),
                            'type':
                            mailpart.get_content_type(),
                        },
                        dispatch_to=self.always_dispatch,
                    )
                    attachment = ExtractedPayload(mailpart.get_content(),
                                                  attachment_meta)
                    attachments.append(attachment)
                except Exception as err:
                    errors.append(f'Failed extracting attachment: {err}')
        if self.extract_iocs:
            ioc_meta = PayloadMeta(should_archive=False,
                                   dispatch_to=['iocextract'])
            attachments.append(ExtractedPayload(ioc_content.encode(),
                                                ioc_meta))
        return WorkerResponse(message_json,
                              errors=errors,
                              extracted=attachments)
示例#12
0
    def scan(self, payload: Payload, request_meta: RequestMeta) -> WorkerResponse:

        message_json = {}
        attachments = []
        errors = []
        ioc_content = ''
        email_session = UnicodeDammit(payload.content).unicode_markup
        message = pyzmail.message_from_string(email_session)

        # Create a dict of the SMTP headers
        for header in message.keys():
            curr_header = header.lower()
            if curr_header in message_json:
                # If the header key already exists, let's join them
                message_json[curr_header] += f'\n{message.get_decoded_header(header)}'
            else:
                message_json[curr_header] = message.get_decoded_header(header)

        if not self.omit_body:
            # Extract the e-mail body, to include HTML if available
            message_json['body'] = (
                ''
                if message.text_part is None
                else UnicodeDammit(message.text_part.get_payload()).unicode_markup
            )
            message_json['body_html'] = (
                ''
                if message.html_part is None
                else UnicodeDammit(message.html_part.get_payload()).unicode_markup
            )

        if self.extract_iocs:
            for k in self.ioc_keys:
                if k in message_json:
                    ioc_content += f'{message_json[k]}\n'

        # Handle attachments
        for mailpart in message.mailparts:
            # Skip if the attachment is a body part
            if mailpart.is_body:
                if self.extract_iocs:
                    ioc_content += UnicodeDammit(mailpart.get_payload()).unicode_markup
            elif mailpart.type.startswith('message/'):
                for part in mailpart.part.get_payload():
                    try:
                        attachment_meta = PayloadMeta(
                            should_archive=self.archive_attachments,
                            extra_data={'attached_msg': True},
                            dispatch_to=['smtp'],
                        )
                        attachment = ExtractedPayload(part.as_bytes(), attachment_meta)
                        attachments.append(attachment)
                    except Exception as err:
                        errors.append(f'Failed extracting attachment: {err}')
            else:
                try:
                    att_filename = mailpart.filename
                    if not att_filename:
                        att_filename = mailpart.sanitized_filename
                    attachment_meta = PayloadMeta(
                        should_archive=self.archive_attachments,
                        extra_data={
                            'charset': mailpart.charset,
                            'content-description': mailpart.part.get(
                                'Content-Description'
                            ),
                            'content-id': mailpart.content_id,
                            'disposition': mailpart.disposition,
                            'filename': att_filename,
                            'type': mailpart.type,
                        },
                        dispatch_to=self.always_dispatch,
                    )
                    attachment = ExtractedPayload(
                        mailpart.get_payload(), attachment_meta
                    )
                    attachments.append(attachment)
                except Exception as err:
                    errors.append(f'Failed extracting attachment: {err}')

        if self.extract_iocs:
            ioc_meta = PayloadMeta(should_archive=False, dispatch_to=['iocextract'])
            attachments.append(ExtractedPayload(ioc_content.encode(), ioc_meta))

        return WorkerResponse(message_json, errors=errors, extracted=attachments)
示例#13
0
    async def scan(self, payload: Payload, request: Request) -> WorkerResponse:
        message_json: Dict[str, str] = {}
        attachments: List[ExtractedPayload] = []
        errors: List[Error] = []
        ioc_content: str = ''
        session = UnicodeDammit(payload.content).unicode_markup
        message = Parser(policy=policy.default).parsestr(session)

        try:
            # Check for invalid date string
            # https://bugs.python.org/issue30681
            message.get('Date')
        except TypeError:
            date_header = [d[1] for d in message._headers if d[0] == 'Date'][0]
            date_header = dtparse(date_header).strftime('%c %z')
            message.replace_header('Date', date_header)

        # Create a dict of the SMTP headers
        for header, value in message.items():
            curr_header = header.lower()
            if curr_header in message_json:
                message_json[curr_header] += f'\n{value}'
            else:
                message_json[curr_header] = value

        if not self.omit_body:
            message_json['body'] = self._get_body(message, 'plain')
            message_json['body_html'] = self._get_body(message, 'html')

        if self.extract_iocs:
            for k in self.ioc_keys:
                if k in message_json:
                    ioc_content += f'\n{message_json[k]}'
                elif k == 'body' and k not in message_json:
                    b = self._get_body(message, 'plain')
                    if b:
                        ioc_content += b
                elif k == 'body_html' and k not in message_json:
                    b = self._get_body(message, 'html')
                    if b:
                        ioc_content += b

        for mailpart in message.iter_attachments():
            if mailpart.get_content_type() == 'message/rfc822':
                for part in mailpart.get_payload():
                    try:
                        attachment_meta = PayloadMeta(
                            should_archive=self.archive_attachments,
                            extra_data={
                                'charset':
                                part.get_content_charset(),
                                'content-description':
                                part.get('Content-Description'),
                                'disposition':
                                part.get_content_disposition(),
                                'filename':
                                part.get_filename(),
                                'type':
                                part.get_content_type(),
                            },
                            dispatch_to=['smtp'],
                        )
                        attachment = ExtractedPayload(part.as_bytes(),
                                                      attachment_meta)
                        attachments.append(attachment)
                    except Exception as err:
                        errors.append(
                            Error(
                                error=f'Failed rfc822 attachment: {err}',
                                plugin_name=self.plugin_name,
                                payload_id=payload.results.payload_id,
                            ))
            else:
                try:
                    attachment_meta = PayloadMeta(
                        should_archive=self.archive_attachments,
                        extra_data={
                            'charset':
                            mailpart.get_content_charset(),
                            'content-description':
                            mailpart.get('Content-Description'),
                            'disposition':
                            mailpart.get_content_disposition(),
                            'filename':
                            mailpart.get_filename(),
                            'type':
                            mailpart.get_content_type(),
                        },
                        dispatch_to=self.always_dispatch,
                    )
                    attachment = ExtractedPayload(mailpart.get_content(),
                                                  attachment_meta)
                    attachments.append(attachment)
                except Exception as err:
                    errors.append(
                        Error(
                            error=f'Failed extracting attachment: {err}',
                            plugin_name=self.plugin_name,
                            payload_id=payload.results.payload_id,
                        ))
        if self.extract_iocs:
            ioc_meta = PayloadMeta(should_archive=False,
                                   dispatch_to=['iocextract'])
            attachments.append(ExtractedPayload(ioc_content.encode(),
                                                ioc_meta))
        return WorkerResponse(message_json,
                              errors=errors,
                              extracted=attachments)