Exemplo n.º 1
0
def _extract_parts(namespace_id, folder_id, body_string):
    data_sha256 = sha256(body_string).hexdigest()

    if not is_in_blockstore(data_sha256):
        save_to_blockstore(data_sha256, body_string)

    try:
        parsed = mime.from_string(body_string)
    except (mime.DecodingError, AttributeError, RuntimeError, TypeError) as e:
        log.error('Error parsing message metadata',
                  folder_id=folder_id,
                  namespace_id=namespace_id,
                  error=e)
        return

    if parsed is None:
        return

    for mimepart in parsed.walk(with_self=parsed.content_type.is_singlepart()):
        try:
            if mimepart.content_type.is_multipart():
                continue  # TODO should we store relations?
            _parse_mimepart(namespace_id, mimepart)
        except (mime.DecodingError, AttributeError, RuntimeError, TypeError,
                binascii.Error, UnicodeDecodeError) as e:
            log.error('Error parsing message MIME parts',
                      folder_id=folder_id,
                      namespace_id=namespace_id,
                      exc_info=True)
            return
Exemplo n.º 2
0
def _extract_parts(namespace_id, folder_id, body_string):
    data_sha256 = sha256(body_string).hexdigest()

    if not is_in_blockstore(data_sha256):
        save_to_blockstore(data_sha256, body_string)

    try:
        parsed = mime.from_string(body_string)
    except (mime.DecodingError, AttributeError, RuntimeError,
            TypeError) as e:
        log.error('Error parsing message metadata',
                  folder_id=folder_id, namespace_id=namespace_id, error=e)
        return

    if parsed is None:
        return

    for mimepart in parsed.walk(
            with_self=parsed.content_type.is_singlepart()):
        try:
            if mimepart.content_type.is_multipart():
                continue  # TODO should we store relations?
            _parse_mimepart(namespace_id, mimepart)
        except (mime.DecodingError, AttributeError, RuntimeError,
                TypeError, binascii.Error, UnicodeDecodeError) as e:
            log.error('Error parsing message MIME parts',
                      folder_id=folder_id, namespace_id=namespace_id,
                      exc_info=True)
            return
Exemplo n.º 3
0
def _save_attachment(data):
    if len(data) == 0:
        log.warning('Not saving 0-length data blob')
        return

    if isinstance(data, unicode):
        data = data.encode('utf-8', 'strict')

    data_sha256 = sha256(data).hexdigest()
    save_to_blockstore(data_sha256, data)
Exemplo n.º 4
0
def _save_attachment(data):
    if len(data) == 0:
        log.warning('Not saving 0-length data blob')
        return

    if isinstance(data, unicode):
        data = data.encode('utf-8', 'strict')

    data_sha256 = sha256(data).hexdigest()
    save_to_blockstore(data_sha256, data)
Exemplo n.º 5
0
    def data(self):
        if self.size == 0:
            log.warning('Block size is 0')
            return ''
        elif hasattr(self, '_data'):
            # On initial download we temporarily store data in memory
            value = self._data
        else:
            value = get_from_blockstore(self.data_sha256)

        if value is None:
            log.warning(
                "Couldn't find data on S3 for block with hash {}".format(
                    self.data_sha256))

            from inbox.models.block import Block
            if isinstance(self, Block):
                if self.parts:
                    # This block is an attachment of a message that was
                    # accidentially deleted. We will attempt to fetch the raw
                    # message and parse out the needed attachment.

                    message = self.parts[0].message  # only grab one
                    raw_mime = get_from_blockstore(message.data_sha256)

                    if raw_mime is None:
                        log.error("Don't have raw message for hash {}".format(
                            message.data_sha256))
                        return None

                    parsed = mime.from_string(raw_mime)
                    if parsed is not None:
                        for mimepart in parsed.walk(
                                with_self=parsed.content_type.is_singlepart()):
                            if mimepart.content_type.is_multipart():
                                continue  # TODO should we store relations?

                            data = mimepart.body

                            if isinstance(data, unicode):
                                data = data.encode('utf-8', 'strict')

                            # Found it!
                            if sha256(data).hexdigest() == self.data_sha256:
                                log.info('Found subpart with hash {}'.format(
                                    self.data_sha256))
                                save_to_blockstore(self.data_sha256, data)
                                return data

            log.error('No data returned!')
            return value

        assert self.data_sha256 == sha256(value).hexdigest(), \
            "Returned data doesn't match stored hash!"
        return value
Exemplo n.º 6
0
    def data(self):
        if self.size == 0:
            log.warning("Block size is 0")
            return ""
        elif hasattr(self, "_data"):
            # On initial download we temporarily store data in memory
            value = self._data
        else:
            value = get_from_blockstore(self.data_sha256)

        if value is None:
            log.warning("Couldn't find data on S3 for block with hash {}".format(self.data_sha256))

            from inbox.models.block import Block

            if isinstance(self, Block):
                if self.parts:
                    # This block is an attachment of a message that was
                    # accidentially deleted. We will attempt to fetch the raw
                    # message and parse out the needed attachment.

                    message = self.parts[0].message  # only grab one
                    raw_mime = get_from_blockstore(message.data_sha256)

                    if raw_mime is None:
                        log.error("Don't have raw message for hash {}".format(message.data_sha256))
                        return None

                    parsed = mime.from_string(raw_mime)
                    if parsed is not None:
                        for mimepart in parsed.walk(with_self=parsed.content_type.is_singlepart()):
                            if mimepart.content_type.is_multipart():
                                continue  # TODO should we store relations?

                            data = mimepart.body

                            if isinstance(data, unicode):
                                data = data.encode("utf-8", "strict")

                            # Found it!
                            if sha256(data).hexdigest() == self.data_sha256:
                                log.info("Found subpart with hash {}".format(self.data_sha256))
                                save_to_blockstore(self.data_sha256, data)
                                return data

            log.error("No data returned!")
            return value

        assert self.data_sha256 == sha256(value).hexdigest(), "Returned data doesn't match stored hash!"
        return value
Exemplo n.º 7
0
    def data(self, value):
        assert value is not None
        assert type(value) is not unicode

        # Cache value in memory. Otherwise message-parsing incurs a disk or S3
        # roundtrip.
        self._data = value
        self.size = len(value)
        self.data_sha256 = sha256(value).hexdigest()
        assert self.data_sha256

        if len(value) == 0:
            log.warning('Not saving 0-length data blob')
            return

        blockstore.save_to_blockstore(self.data_sha256, value)
Exemplo n.º 8
0
    def data(self, value):
        assert value is not None
        assert type(value) is not unicode

        # Cache value in memory. Otherwise message-parsing incurs a disk or S3
        # roundtrip.
        self._data = value
        self.size = len(value)
        self.data_sha256 = sha256(value).hexdigest()
        assert self.data_sha256

        if len(value) == 0:
            log.warning('Not saving 0-length data blob')
            return

        blockstore.save_to_blockstore(self.data_sha256, value)
Exemplo n.º 9
0
    def data(self, value):
        assert value is not None
        assert isinstance(value, bytes)

        # Cache value in memory. Otherwise message-parsing incurs a disk or S3
        # roundtrip.
        self._data = value
        self.size = len(value)
        self.data_sha256 = sha256(value).hexdigest()
        assert self.data_sha256

        if len(value) == 0:
            log.warning("Not saving 0-length data blob")
            return

        if STORE_MESSAGE_ATTACHMENTS:
            blockstore.save_to_blockstore(self.data_sha256, value)
Exemplo n.º 10
0
    def create_from_synced(cls, account, mid, folder_name, received_date,
                           body_string):
        """
        Parses message data and writes out db metadata and MIME blocks.

        Returns the new Message, which links to the new Part and Block objects
        through relationships. All new objects are uncommitted.

        Threads are not computed here; you gotta do that separately.

        Parameters
        ----------
        mid : int
            The account backend-specific message identifier; it's only used for
            logging errors.

        raw_message : str
            The full message including headers (encoded).

        """
        _rqd = [account, mid, folder_name, body_string]
        if not all([v is not None for v in _rqd]):
            raise ValueError(
                'Required keyword arguments: account, mid, folder_name, '
                'body_string')
        # stop trickle-down bugs
        assert account.namespace is not None
        assert not isinstance(body_string, unicode)

        msg = Message()

        msg.data_sha256 = sha256(body_string).hexdigest()

        # Persist the raw MIME message to disk/ S3
        save_to_blockstore(msg.data_sha256, body_string)

        # Persist the processed message to the database
        msg.namespace_id = account.namespace.id

        try:
            parsed = mime.from_string(body_string)
            # Non-persisted instance attribute used by EAS.
            msg.parsed_body = parsed
            msg._parse_metadata(parsed, body_string, received_date, account.id,
                                folder_name, mid)
        except (mime.DecodingError, AttributeError, RuntimeError,
                TypeError) as e:
            parsed = None
            # Non-persisted instance attribute used by EAS.
            msg.parsed_body = ''
            log.error('Error parsing message metadata',
                      folder_name=folder_name, account_id=account.id, error=e)
            msg._mark_error()

        if parsed is not None:
            plain_parts = []
            html_parts = []
            for mimepart in parsed.walk(
                    with_self=parsed.content_type.is_singlepart()):
                try:
                    if mimepart.content_type.is_multipart():
                        continue  # TODO should we store relations?
                    msg._parse_mimepart(mid, mimepart, account.namespace.id,
                                        html_parts, plain_parts)
                except (mime.DecodingError, AttributeError, RuntimeError,
                        TypeError, binascii.Error, UnicodeDecodeError) as e:
                    log.error('Error parsing message MIME parts',
                              folder_name=folder_name, account_id=account.id,
                              error=e)
                    msg._mark_error()
            msg.calculate_body(html_parts, plain_parts)

            # Occasionally people try to send messages to way too many
            # recipients. In such cases, empty the field and treat as a parsing
            # error so that we don't break the entire sync.
            for field in ('to_addr', 'cc_addr', 'bcc_addr', 'references',
                          'reply_to'):
                value = getattr(msg, field)
                if json_field_too_long(value):
                    log.error('Recipient field too long', field=field,
                              account_id=account.id, folder_name=folder_name,
                              mid=mid)
                    setattr(msg, field, [])
                    msg._mark_error()

        return msg
Exemplo n.º 11
0
    def data(self):
        if self.size == 0:
            log.warning('Block size is 0')
            return ''
        elif hasattr(self, '_data'):
            # On initial download we temporarily store data in memory
            value = self._data
        else:
            value = blockstore.get_from_blockstore(self.data_sha256)

        if value is None:
            log.warning("Couldn't find data on S3 for block",
                        sha_hash=self.data_sha256)

            from inbox.models.block import Block
            if isinstance(self, Block):
                if self.parts:
                    # This block is an attachment of a message that was
                    # deleted. We will attempt to fetch the raw
                    # message and parse out the needed attachment.

                    message = self.parts[0].message  # only grab one
                    account = message.namespace.account

                    statsd_string = 'api.direct_fetching.{}.{}'.format(
                        account.provider, account.id)

                    # Try to fetch the message from S3 first.
                    with statsd_client.timer('{}.blockstore_latency'.format(
                                             statsd_string)):
                        raw_mime = blockstore.get_from_blockstore(message.data_sha256)

                    # If it's not there, get it from the provider.
                    if raw_mime is None:
                        statsd_client.incr('{}.cache_misses'.format(statsd_string))

                        with statsd_client.timer('{}.provider_latency'.format(
                                                 statsd_string)):
                            raw_mime = get_raw_from_provider(message)

                        msg_sha256 = sha256(raw_mime).hexdigest()

                        # Cache the raw message in the blockstore so that
                        # we don't have to fetch it over and over.

                        with statsd_client.timer('{}.blockstore_save_latency'.format(
                                                 statsd_string)):
                            blockstore.save_to_blockstore(msg_sha256, raw_mime)
                    else:
                        # We found it in the blockstore --- report this.
                        statsd_client.incr('{}.cache_hits'.format(statsd_string))

                    # If we couldn't find it there, give up.
                    if raw_mime is None:
                        log.error("Don't have raw message for hash {}"
                                  .format(message.data_sha256))
                        return None

                    parsed = mime.from_string(raw_mime)
                    if parsed is not None:
                        for mimepart in parsed.walk(
                                with_self=parsed.content_type.is_singlepart()):
                            if mimepart.content_type.is_multipart():
                                continue  # TODO should we store relations?

                            data = mimepart.body

                            if isinstance(data, unicode):
                                data = data.encode('utf-8', 'strict')

                            if data is None:
                                continue

                            # Found it!
                            if sha256(data).hexdigest() == self.data_sha256:
                                log.info('Found subpart with hash {}'.format(
                                    self.data_sha256))

                                with statsd_client.timer('{}.blockstore_save_latency'.format(
                                                         statsd_string)):
                                    blockstore.save_to_blockstore(self.data_sha256, data)
                                    return data
                    log.error("Couldn't find the attachment in the raw message", message_id=message.id)

            log.error('No data returned!')
            return value

        assert self.data_sha256 == sha256(value).hexdigest(), \
            "Returned data doesn't match stored hash!"
        return value
Exemplo n.º 12
0
    def data(self):
        if self.size == 0:
            log.warning('Block size is 0')
            return ''
        elif hasattr(self, '_data'):
            # On initial download we temporarily store data in memory
            value = self._data
        else:
            value = blockstore.get_from_blockstore(self.data_sha256)

        if value is None:
            log.warning("Couldn't find data on S3 for block",
                        sha_hash=self.data_sha256)

            from inbox.models.block import Block
            if isinstance(self, Block):
                if self.parts:
                    # This block is an attachment of a message that was
                    # deleted. We will attempt to fetch the raw
                    # message and parse out the needed attachment.

                    message = self.parts[0].message  # only grab one
                    account = message.namespace.account

                    statsd_string = 'api.direct_fetching.{}.{}'.format(
                        account.provider, account.id)

                    # Try to fetch the message from S3 first.
                    with statsd_client.timer(
                            '{}.blockstore_latency'.format(statsd_string)):
                        raw_mime = blockstore.get_from_blockstore(
                            message.data_sha256)

                    # If it's not there, get it from the provider.
                    if raw_mime is None:
                        statsd_client.incr(
                            '{}.cache_misses'.format(statsd_string))

                        with statsd_client.timer(
                                '{}.provider_latency'.format(statsd_string)):
                            raw_mime = get_raw_from_provider(message)

                        msg_sha256 = sha256(raw_mime).hexdigest()

                        # Cache the raw message in the blockstore so that
                        # we don't have to fetch it over and over.

                        with statsd_client.timer(
                                '{}.blockstore_save_latency'.format(
                                    statsd_string)):
                            blockstore.save_to_blockstore(msg_sha256, raw_mime)
                    else:
                        # We found it in the blockstore --- report this.
                        statsd_client.incr(
                            '{}.cache_hits'.format(statsd_string))

                    # If we couldn't find it there, give up.
                    if raw_mime is None:
                        log.error("Don't have raw message for hash {}".format(
                            message.data_sha256))
                        return None

                    parsed = mime.from_string(raw_mime)
                    if parsed is not None:
                        for mimepart in parsed.walk(
                                with_self=parsed.content_type.is_singlepart()):
                            if mimepart.content_type.is_multipart():
                                continue  # TODO should we store relations?

                            data = mimepart.body

                            if isinstance(data, unicode):
                                data = data.encode('utf-8', 'strict')

                            if data is None:
                                continue

                            # Found it!
                            if sha256(data).hexdigest() == self.data_sha256:
                                log.info('Found subpart with hash {}'.format(
                                    self.data_sha256))

                                with statsd_client.timer(
                                        '{}.blockstore_save_latency'.format(
                                            statsd_string)):
                                    blockstore.save_to_blockstore(
                                        self.data_sha256, data)
                                    return data
                    log.error(
                        "Couldn't find the attachment in the raw message",
                        message_id=message.id)

            log.error('No data returned!')
            return value

        assert self.data_sha256 == sha256(value).hexdigest(), \
            "Returned data doesn't match stored hash!"
        return value