Пример #1
0
def parse_attachment(message_part, state, attachments=None):
    """ Extract the attachment and metadata about it from the message.

    Returns the content, content type, size, and create/modification/read dates
    for the attachment.
    """
    params = message_part.get_params(None, "Content-Disposition")
    if params:
        # If a 'part' has a Content-Disposition, we assume it is an attachment
        try:
            params = dict(params)
            print("\tContent-Disposition (for following email)", params)
            if "attachment" in params:
                # Not sure what's going on here
                # Why get payload with decode, then try again and reparse?
                # See details at
                # http://docs.python.org/2/library/email.message.html#email.message.Message.get_payload
                file_data = message_part.get_payload(decode=True)
                if not file_data:
                    payload = message_part.get_payload()
                    if isinstance(payload, list):
                        for msgobj in payload:
                            # TODO not sure this actually does anything
                            parse2(msgobj, state, attachments)
                        return None
                    print(message_part.get_payload(), file=sys.stderr)
                    print(message_part.get_content_charset(), file=sys.stderr)

                attachment = StringIO(file_data)
                attachment.content_type = message_part.get_content_type()
                attachment.size = params.get("size", len(file_data))
                attachment.create_date = params.get("create-date")
                attachment.mod_date = params.get("modification-date")
                attachment.read_date = params.get("read-date")
                # TODO convert dates to datetime

                filename = message_part.get_filename(None)
                if filename:
                    # Filenames may be encoded with =?encoding?...
                    # If so, convert to unicode
                    name, encoding = email.header.decode_header(filename)[0]
                    if encoding:
                        print(
                            "\t{filename} encoded with {encoding}, converting to unicode"
                            .format(filename=filename, encoding=encoding))
                        filename = name.decode(encoding)
                else:  # filename not in Content-Disposition
                    print(
                        """Warning, no filename found in: [{%s}%s] Content-Disposition: %s or Content-Type"""
                        % (state.sourceFileUUID, state.sourceFilePath, params),
                        file=sys.stderr,
                    )
                    filename = six.text_type(uuid.uuid4())
                    print(
                        "Attempting extraction with random filename: %s" %
                        (filename),
                        file=sys.stderr,
                    )
                # Remove newlines from filename because that breaks everything
                filename = filename.replace("\r", "").replace("\n", "")

                attachment.name = filename
                return attachment

        except Exception as inst:
            print(type(inst), file=sys.stderr)
            print(inst.args, file=sys.stderr)
            print(
                "Error parsing: file: {%s}%s" %
                (state.sourceFileUUID, state.sourceFilePath),
                file=sys.stderr,
            )
            print("Error parsing: Content-Disposition: ",
                  params,
                  file=sys.stderr)
            print(file=sys.stderr)
            state.error_count += 1
    return None