Пример #1
0
def noop_event_update(event, data):
    # Check whether the update is actually updating fields.
    # We do this by cloning the event, updating the fields and
    # comparing them. This is less cumbersome than having to think
    # about the multiple values of the `when` field.
    e = Event()
    e.update(event)
    e.namespace = event.namespace

    for attr in Event.API_MODIFIABLE_FIELDS:
        if attr in data:
            setattr(e, attr, data[attr])

    e1 = encode(event)
    e2 = encode(e)

    for attr in Event.API_MODIFIABLE_FIELDS:
        # We have to handle participants a bit differently because
        # it's a list which can be permuted.
        if attr == 'participants':
            continue

        event_value = e1.get(attr)
        e_value = e2.get(attr)
        if event_value != e_value:
            return False

    e_participants = {p['email']: p for p in e.participants}
    event_participants = {p['email']: p for p in event.participants}
    if len(e_participants.keys()) != len(event_participants.keys()):
        return False

    for email in e_participants:
        if email not in event_participants:
            return False

        p1 = e_participants[email]
        p2 = event_participants[email]

        p1_status = p1.get('status')
        p2_status = p2.get('status')
        if p1_status != p2_status:
            return False

        p1_comment = p1.get('comment')
        p2_comment = p2.get('comment')
        if p1_comment != p2_comment:
            return False

    return True
Пример #2
0
def noop_event_update(event, data):
    # Check whether the update is actually updating fields.
    # We do this by cloning the event, updating the fields and
    # comparing them. This is less cumbersome than having to think
    # about the multiple values of the `when` field.
    e = Event()
    e.update(event)
    e.namespace = event.namespace

    for attr in Event.API_MODIFIABLE_FIELDS:
        if attr in data:
            setattr(e, attr, data[attr])

    e1 = encode(event)
    e2 = encode(e)

    for attr in Event.API_MODIFIABLE_FIELDS:
        # We have to handle participants a bit differently because
        # it's a list which can be permuted.
        if attr == 'participants':
            continue

        event_value = e1.get(attr)
        e_value = e2.get(attr)
        if event_value != e_value:
            return False

    e_participants = {p['email']: p for p in e.participants}
    event_participants = {p['email']: p for p in event.participants}
    if len(e_participants.keys()) != len(event_participants.keys()):
        return False

    for email in e_participants:
        if email not in event_participants:
            return False

        p1 = e_participants[email]
        p2 = event_participants[email]

        p1_status = p1.get('status')
        p2_status = p2.get('status')
        if p1_status != p2_status:
            return False

        p1_comment = p1.get('comment')
        p2_comment = p2.get('comment')
        if p1_comment != p2_comment:
            return False

    return True
Пример #3
0
def index_messages(namespace_id, namespace_public_id, created_before=None):
    """ Index the messages of a namespace. """
    if created_before is not None:
        created_before = dateutil.parser.parse(created_before)

    indexed_count = 0
    search_engine = NamespaceSearchEngine(namespace_public_id)

    with session_scope() as db_session:
        query = db_session.query(Message).filter(
            Message.namespace_id == namespace_id)

        if created_before is not None:
            query = query.filter(Message.created_at <= created_before)

        query = query.options(joinedload(Message.parts).
                              load_only('content_disposition'))

        encoded = []
        for obj in safer_yield_per(query, Message.id, 0, CHUNK_SIZE):
            encoded_obj = encode(obj, namespace_public_id=namespace_public_id)
            index_obj = _process_attributes(encoded_obj)

            encoded.append(('index', index_obj))

    log.info('Going to index messages', namespace_id=namespace_id,
             namespace_public_id=namespace_public_id)

    indexed_count += search_engine.messages.bulk_index(encoded)

    log.info('Indexed messages', namespace_id=namespace_id,
             namespace_public_id=namespace_public_id,
             message_count=indexed_count)

    return indexed_count
Пример #4
0
def send_draft_copy(account, draft, custom_body, recipient):
    """
    Sends a copy of this draft to the recipient, using the specified body
    rather that the one on the draft object, and not marking the draft as
    sent. Used within multi-send to send messages to individual recipients
    with customized bodies.
    """
    # Create the response to send on success by serlializing the draft. After
    # serializing, we replace the new custom body (which the recipient will get
    # and which should be returned in this response) in place of the existing
    # body (which we still need to retain in the draft for when it's saved to
    # the sent folder).
    response_on_success = encode(draft)
    response_on_success["body"] = custom_body
    response_on_success = APIEncoder().jsonify(response_on_success)

    # Now send the draft to the specified recipient. The send_custom method
    # will write the custom body into the message in place of the one in the
    # draft.
    try:
        sendmail_client = get_sendmail_client(account)
        sendmail_client.send_custom(draft, custom_body, [recipient])
    except SendMailException as exc:
        kwargs = {}
        if exc.failures:
            kwargs["failures"] = exc.failures
        if exc.server_error:
            kwargs["server_error"] = exc.server_error
        return err(exc.http_code, exc.message, **kwargs)

    return response_on_success
Пример #5
0
def index_messages(namespace, updated_since=None):
    """ Index the messages of a namespace. """
    namespace_id, namespace_public_id = namespace

    if updated_since is not None:
        updated_since = dateutil.parser.parse(updated_since)

    indexed_count = 0
    search_engine = NamespaceSearchEngine(namespace_public_id)

    with session_scope() as db_session:
        query = db_session.query(Message).filter(
            Message.namespace_id == namespace.id)

        if updated_since is not None:
            query = query.filter(Message.updated_at > updated_since)

        query = query.options(joinedload(Message.parts).
                              load_only('content_disposition'))

        encoded = []
        for obj in safer_yield_per(query, Message.id, 0, CHUNK_SIZE):
            encoded_obj = encode(obj, namespace_public_id=namespace_public_id)
            encoded.append(encoded_obj)

    indexed_count += search_engine.messages.bulk_index(encoded)

    log.info('Indexed messages', namespace_id=namespace_id,
             namespace_public_id=namespace_public_id,
             message_count=indexed_count)

    return indexed_count
Пример #6
0
def send_draft_copy(account, draft, custom_body, recipient):
    """
    Sends a copy of this draft to the recipient, using the specified body
    rather that the one on the draft object, and not marking the draft as
    sent. Used within multi-send to send messages to individual recipients
    with customized bodies.
    """
    # Create the response to send on success by serlializing the draft. After
    # serializing, we replace the new custom body (which the recipient will get
    # and which should be returned in this response) in place of the existing
    # body (which we still need to retain in the draft for when it's saved to
    # the sent folder).
    response_on_success = encode(draft)
    response_on_success['body'] = custom_body
    response_on_success = APIEncoder().jsonify(response_on_success)

    # Now send the draft to the specified recipient. The send_custom method
    # will write the custom body into the message in place of the one in the
    # draft.
    try:
        sendmail_client = get_sendmail_client(account)
        sendmail_client.send_custom(draft, custom_body, [recipient])
    except SendMailException as exc:
        kwargs = {}
        if exc.failures:
            kwargs['failures'] = exc.failures
        if exc.server_error:
            kwargs['server_error'] = exc.server_error
        return err(exc.http_code, exc.message, **kwargs)

    return response_on_success
Пример #7
0
def index_messages(namespace, updated_since=None):
    """ Index the messages of a namespace. """
    namespace_id, namespace_public_id = namespace

    if updated_since is not None:
        updated_since = dateutil.parser.parse(updated_since)

    indexed_count = 0
    search_engine = NamespaceSearchEngine(namespace_public_id)

    with session_scope() as db_session:
        query = db_session.query(Message).filter(
            Message.namespace_id == namespace_id)

        if updated_since is not None:
            query = query.filter(Message.updated_at > updated_since)

        query = query.options(
            joinedload(Message.parts).load_only('content_disposition'))

        encoded = []
        for obj in safer_yield_per(query, Message.id, 0, CHUNK_SIZE):
            encoded_obj = encode(obj, namespace_public_id=namespace_public_id)
            encoded.append(('index', encoded_obj))

    indexed_count += search_engine.messages.bulk_index(encoded)

    log.info('Indexed messages',
             namespace_id=namespace_id,
             namespace_public_id=namespace_public_id,
             message_count=indexed_count)

    return indexed_count
Пример #8
0
def create_revision(obj, session, revision_type):
    from inbox.api.kellogs import encode
    assert revision_type in ('insert', 'update', 'delete')
    if (not isinstance(obj, HasRevisions) or
            obj.should_suppress_transaction_creation):
        return
    if revision_type == 'update' and not obj.has_versioned_changes():
        return
    revision = Transaction(command=revision_type, record_id=obj.id,
                           object_type=obj.API_OBJECT_NAME,
                           object_public_id=obj.public_id,
                           namespace_id=obj.namespace.id)
    if revision_type != 'delete':
        revision.snapshot = encode(obj)
    session.add(revision)
Пример #9
0
def create_revision(obj, session, revision_type):
    from inbox.api.kellogs import encode
    assert revision_type in ('insert', 'update', 'delete')
    if (not isinstance(obj, HasRevisions) or
            obj.should_suppress_transaction_creation):
        return
    if revision_type == 'update' and not obj.has_versioned_changes():
        return
    revision = Transaction(command=revision_type, record_id=obj.id,
                           object_type=obj.API_OBJECT_NAME,
                           object_public_id=obj.public_id,
                           namespace_id=obj.namespace.id)
    if revision_type != 'delete':
        revision.snapshot = encode(obj)
    session.add(revision)
Пример #10
0
    def take_snapshot(self, obj):
        """Record the API's representation of `obj` at the time this
        transaction is generated, as well as any other properties we want to
        have available in the transaction log. Used for delta syncing and
        the ping API."""
        from inbox.api.kellogs import encode
        self.public_snapshot = encode(obj)

        from inbox.models.message import Message
        if isinstance(obj, Message):  # hack
            self.private_snapshot = {
                'recentdate': obj.thread.recentdate,
                'subjectdate': obj.thread.subjectdate,
                'filenames': [part.block.filename for part in obj.parts if
                              part.is_attachment]}
Пример #11
0
def index_threads(namespace_id, namespace_public_id, created_before=None):
    """ Index the threads of a namespace. """
    if created_before is not None:
        created_before = dateutil.parser.parse(created_before)

    indexed_count = 0
    search_engine = NamespaceSearchEngine(namespace_public_id,
                                          create_index=True)

    with session_scope() as db_session:
        query = db_session.query(Thread).filter(
            Thread.namespace_id == namespace_id)

        if created_before is not None:
            query = query.filter(Thread.created_at <= created_before)

        query = query.options(
            subqueryload(Thread.messages).load_only('public_id', 'is_draft',
                                                    'from_addr', 'to_addr',
                                                    'cc_addr', 'bcc_addr'),
            subqueryload('tagitems').joinedload('tag').load_only(
                'public_id', 'name'))

        encoded = []

        for obj in safer_yield_per(query, Thread.id, 0, CHUNK_SIZE):
            if len(encoded) >= INDEX_CHUNK_SIZE:
                indexed_count += search_engine.threads.bulk_index(encoded)
                encoded = []

            index_obj = encode(obj, namespace_public_id=namespace_public_id)
            encoded.append(('index', index_obj))

        if encoded:
            indexed_count += search_engine.threads.bulk_index(encoded)

    log.info('Indexed threads',
             namespace_id=namespace_id,
             namespace_public_id=namespace_public_id,
             thread_count=indexed_count)

    return indexed_count
Пример #12
0
def index_threads(namespace_id, namespace_public_id, created_before=None):
    """ Index the threads of a namespace. """
    if created_before is not None:
        created_before = dateutil.parser.parse(created_before)

    indexed_count = 0
    search_engine = NamespaceSearchEngine(namespace_public_id,
                                          create_index=True)

    with session_scope() as db_session:
        query = db_session.query(Thread).filter(
            Thread.namespace_id == namespace_id)

        if created_before is not None:
            query = query.filter(Thread.created_at <= created_before)

        query = query.options(
            subqueryload(Thread.messages).
            load_only('public_id', 'is_draft', 'from_addr', 'to_addr',
                      'cc_addr', 'bcc_addr'),
            subqueryload('tagitems').joinedload('tag').
            load_only('public_id', 'name'))

        encoded = []

        for obj in safer_yield_per(query, Thread.id, 0, CHUNK_SIZE):
            if len(encoded) >= INDEX_CHUNK_SIZE:
                indexed_count += search_engine.threads.bulk_index(encoded)
                encoded = []

            index_obj = encode(obj, namespace_public_id=namespace_public_id)
            encoded.append(('index', index_obj))

        if encoded:
            indexed_count += search_engine.threads.bulk_index(encoded)

    log.info('Indexed threads', namespace_id=namespace_id,
             namespace_public_id=namespace_public_id,
             thread_count=indexed_count)

    return indexed_count
Пример #13
0
    def index(self, transactions, db_session):
        """
        Translate database operations to Elasticsearch index operations
        and perform them.

        """
        namespace_map = defaultdict(lambda: defaultdict(list))

        for trx in transactions:
            namespace_id = trx.namespace.public_id
            type_ = trx.object_type
            if trx.command == 'delete':
                operation = 'delete'
                api_repr = {'id': trx.object_public_id}
            else:
                operation = 'index'
                object_cls = transaction_objects()[trx.object_type]
                obj = db_session.query(object_cls).get(trx.record_id)
                if obj is None:
                    continue
                api_repr = encode(obj, namespace_public_id=namespace_id)

            namespace_map[namespace_id][type_].append((operation, api_repr))

        self.log.info('namespaces to index count', count=len(namespace_map))

        for namespace_id in namespace_map:
            engine = NamespaceSearchEngine(namespace_id, create_index=True)

            messages = namespace_map[namespace_id]['message']
            message_count = engine.messages.bulk_index(messages) if messages \
                else 0

            threads = namespace_map[namespace_id]['thread']
            thread_count = engine.threads.bulk_index(threads) if threads \
                else 0

            self.log.info('per-namespace index counts',
                          namespace_id=namespace_id,
                          message_count=message_count,
                          thread_count=thread_count)
Пример #14
0
def index_messages(namespace_id, namespace_public_id, created_before=None):
    """ Index the messages of a namespace. """
    if created_before is not None:
        created_before = dateutil.parser.parse(created_before)

    indexed_count = 0
    search_engine = NamespaceSearchEngine(namespace_public_id,
                                          create_index=True)

    with session_scope() as db_session:
        query = db_session.query(Message).filter(
            Message.namespace_id == namespace_id)

        if created_before is not None:
            query = query.filter(Message.created_at <= created_before)

        query = query.options(
            joinedload(Message.parts).load_only('content_disposition'))

        encoded = []
        for obj in safer_yield_per(query, Message.id, 0, CHUNK_SIZE):
            if len(encoded) >= INDEX_CHUNK_SIZE:
                indexed_count += search_engine.messages.bulk_index(encoded)
                encoded = []

            index_obj = encode(obj, namespace_public_id=namespace_public_id)
            encoded.append(('index', index_obj))

        if encoded:
            indexed_count += search_engine.messages.bulk_index(encoded)

    log.info('Indexed messages',
             namespace_id=namespace_id,
             namespace_public_id=namespace_public_id,
             message_count=indexed_count)

    return indexed_count
Пример #15
0
def index_namespace(namespace_public_id, updated_since=None):
    """
    Create an Elasticsearch index for a namespace and index its threads and
    messages.

    """
    if updated_since is not None:
        updated_since = dateutil.parser.parse(updated_since)

    indexed_count = 0
    for obj_type in (Message, Thread):
        with session_scope() as db_session:
            namespace = db_session.query(Namespace).filter(
                Namespace.public_id == namespace_public_id).one()

            search_engine = NamespaceSearchEngine(namespace_public_id)
            # TODO: paginate the query so that we don't run out of memory on
            # life-sized accounts.
            objects = db_session.query(obj_type).filter(
                obj_type.namespace_id == namespace.id)

            if updated_since is not None:
                objects = objects.filter(obj_type.updated_at > updated_since)

            for obj in objects.all():
                encoded_obj = encode(
                    obj, namespace_public_id=namespace_public_id,
                    format_address_fn=es_format_address_list,
                    format_tags_fn=es_format_tags_list)
                if obj_type == Message:
                    search_engine.messages.index(encoded_obj)
                elif obj_type == Thread:
                    search_engine.threads.index(encoded_obj)

                indexed_count += 1

    return indexed_count
Пример #16
0
def format_transactions_after_pointer(namespace,
                                      pointer,
                                      db_session,
                                      result_limit,
                                      exclude_types=None,
                                      include_types=None,
                                      exclude_folders=True,
                                      expand=False):
    """
    Return a pair (deltas, new_pointer), where deltas is a list of change
    events, represented as dictionaries:
    {
      "object": <API object type, e.g. "thread">,
      "event": <"create", "modify", or "delete>,
      "attributes": <API representation of the object for insert/update events>
      "cursor": <public_id of the transaction>
    }

    and new_pointer is the integer id of the last included transaction

    Arguments
    ---------
    namespace_id: int
        Id of the namespace for which to get changes.
    pointer: int
        Process transactions starting after this id.
    db_session: new_session
        database session
    result_limit: int
        Maximum number of results to return. (Because we may roll up multiple
        changes to the same object, fewer results can be returned.)
    format_transaction_fn: function pointer
        Function that defines how to format the transactions.
    exclude_types: list, optional
        If given, don't include transactions for these types of objects.

    """
    exclude_types = set(exclude_types) if exclude_types else set()
    # Begin backwards-compatibility shim -- suppress new object types for now,
    # because clients may not be able to deal with them.
    exclude_types.add('account')
    if exclude_folders is True:
        exclude_types.update(('folder', 'label'))
    # End backwards-compatibility shim.

    last_trx = _get_last_trx_id_for_namespace(namespace.id, db_session)
    if last_trx == pointer:
        return ([], pointer)

    while True:
        # deleted_at condition included to allow this query to be satisfied via
        # the legacy index on (namespace_id, deleted_at) for performance.
        # Also need to explicitly specify the index hint because the query
        # planner is dumb as nails and otherwise would make this super slow for
        # some values of namespace_id and pointer.
        # TODO(emfree): Remove this hack and ensure that the right index (on
        # namespace_id only) exists.
        transactions = db_session.query(Transaction). \
            filter(
                Transaction.id > pointer,
                Transaction.namespace_id == namespace.id,
                Transaction.deleted_at.is_(None)). \
            with_hint(Transaction, 'USE INDEX (namespace_id_deleted_at)')

        if exclude_types is not None:
            transactions = transactions.filter(
                ~Transaction.object_type.in_(exclude_types))

        if include_types is not None:
            transactions = transactions.filter(
                Transaction.object_type.in_(include_types))

        transactions = transactions. \
            order_by(asc(Transaction.id)).limit(result_limit).all()

        if not transactions:
            return ([], pointer)

        results = []

        # Group deltas by object type.
        trxs_by_obj_type = collections.defaultdict(list)
        for trx in transactions:
            trxs_by_obj_type[trx.object_type].append(trx)

        for obj_type, trxs in trxs_by_obj_type.items():
            # Build a dictionary mapping pairs (record_id, command) to
            # transaction. If successive modifies for a given record id appear
            # in the list of transactions, this will only keep the latest
            # one (which is what we want).
            latest_trxs = {(trx.record_id, trx.command): trx
                           for trx in sorted(trxs, key=lambda t: t.id)
                           }.values()
            # Load all referenced not-deleted objects.
            ids_to_query = [
                trx.record_id for trx in latest_trxs if trx.command != 'delete'
            ]

            object_cls = transaction_objects()[obj_type]
            query = db_session.query(object_cls).filter(
                object_cls.id.in_(ids_to_query),
                object_cls.namespace_id == namespace.id)
            if object_cls == Thread:
                query = query.options(*Thread.api_loading_options(expand))
            elif object_cls == Message:
                query = query.options(*Message.api_loading_options(expand))
            objects = {obj.id: obj for obj in query}

            for trx in latest_trxs:
                delta = {
                    'object': trx.object_type,
                    'event': EVENT_NAME_FOR_COMMAND[trx.command],
                    'id': trx.object_public_id,
                    'cursor': trx.public_id
                }
                if trx.command != 'delete':
                    obj = objects.get(trx.record_id)
                    if obj is None:
                        continue
                    repr_ = encode(obj,
                                   namespace_public_id=namespace.public_id,
                                   expand=expand)
                    delta['attributes'] = repr_

                results.append((trx.id, delta))

        if results:
            # Sort deltas by id of the underlying transactions.
            results.sort()
            deltas = [d for _, d in results]
            return (deltas, results[-1][0])
        else:
            # It's possible that none of the referenced objects exist any more,
            # meaning the result list is empty. In that case, keep traversing
            # the log until we get actual results or reach the end.
            pointer = transactions[-1].id
Пример #17
0
 def default(self, data):
     serialized = encode(data)
     if serialized is not None:
         return serialized
     raise TypeError
Пример #18
0
def format_transactions_after_pointer(namespace, pointer, db_session,
                                      result_limit, exclude_types=None,
                                      include_types=None, exclude_folders=True,
                                      exclude_metadata=True, exclude_account=True,
                                      expand=False):
    """
    Return a pair (deltas, new_pointer), where deltas is a list of change
    events, represented as dictionaries:
    {
      "object": <API object type, e.g. "thread">,
      "event": <"create", "modify", or "delete>,
      "attributes": <API representation of the object for insert/update events>
      "cursor": <public_id of the transaction>
    }

    and new_pointer is the integer id of the last included transaction

    Arguments
    ---------
    namespace_id: int
        Id of the namespace for which to get changes.
    pointer: int
        Process transactions starting after this id.
    db_session: new_session
        database session
    result_limit: int
        Maximum number of results to return. (Because we may roll up multiple
        changes to the same object, fewer results can be returned.)
    format_transaction_fn: function pointer
        Function that defines how to format the transactions.
    exclude_types: list, optional
        If given, don't include transactions for these types of objects.

    """
    exclude_types = set(exclude_types) if exclude_types else set()
    # Begin backwards-compatibility shim -- suppress new object types for now,
    # because clients may not be able to deal with them.
    if exclude_folders is True:
        exclude_types.update(('folder', 'label'))
    if exclude_account is True:
        exclude_types.add('account')
    # End backwards-compatibility shim.

    # Metadata is excluded by default, and can only be included by setting the
    # exclude_metadata flag to False. If listed in include_types, remove it.
    if exclude_metadata is True:
        exclude_types.add('metadata')
    if include_types is not None and 'metadata' in include_types:
        include_types.remove('metadata')

    last_trx = _get_last_trx_id_for_namespace(namespace.id, db_session)
    if last_trx == pointer:
        return ([], pointer)

    while True:
        # deleted_at condition included to allow this query to be satisfied via
        # the legacy index on (namespace_id, deleted_at) for performance.
        # Also need to explicitly specify the index hint because the query
        # planner is dumb as nails and otherwise would make this super slow for
        # some values of namespace_id and pointer.
        # TODO(emfree): Remove this hack and ensure that the right index (on
        # namespace_id only) exists.
        transactions = db_session.query(Transaction). \
            filter(
                Transaction.id > pointer,
                Transaction.namespace_id == namespace.id,
                Transaction.deleted_at.is_(None)). \
            with_hint(Transaction, 'USE INDEX (namespace_id_deleted_at)')

        if exclude_types is not None:
            transactions = transactions.filter(
                ~Transaction.object_type.in_(exclude_types))

        if include_types is not None:
            transactions = transactions.filter(
                Transaction.object_type.in_(include_types))

        transactions = transactions. \
            order_by(asc(Transaction.id)).limit(result_limit).all()

        if not transactions:
            return ([], pointer)

        results = []

        # Group deltas by object type.
        trxs_by_obj_type = collections.defaultdict(list)
        for trx in transactions:
            trxs_by_obj_type[trx.object_type].append(trx)

        for obj_type, trxs in trxs_by_obj_type.items():
            # Build a dictionary mapping pairs (record_id, command) to
            # transaction. If successive modifies for a given record id appear
            # in the list of transactions, this will only keep the latest
            # one (which is what we want).
            latest_trxs = {(trx.record_id, trx.command): trx for trx in
                           sorted(trxs, key=lambda t: t.id)}.values()
            # Load all referenced not-deleted objects.
            ids_to_query = [trx.record_id for trx in latest_trxs
                            if trx.command != 'delete']

            object_cls = transaction_objects()[obj_type]

            if object_cls == Account:
                # The base query for Account queries the /Namespace/ table
                # since the API-returned "`account`" is a `namespace`
                # under-the-hood.
                query = db_session.query(Namespace).join(Account).filter(
                    Account.id.in_(ids_to_query),
                    Namespace.id == namespace.id)

                # Key by /namespace.account_id/ --
                # namespace.id may not be equal to account.id
                # and trx.record_id == account.id for `account` trxs.
                objects = {obj.account_id: obj for obj in query}
            else:
                query = db_session.query(object_cls).filter(
                    object_cls.id.in_(ids_to_query),
                    object_cls.namespace_id == namespace.id)

                if object_cls == Thread:
                    query = query.options(*Thread.api_loading_options(expand))
                elif object_cls == Message:
                    query = query.options(*Message.api_loading_options(expand))

                objects = {obj.id: obj for obj in query}

            for trx in latest_trxs:
                delta = {
                    'object': trx.object_type,
                    'event': EVENT_NAME_FOR_COMMAND[trx.command],
                    'id': trx.object_public_id,
                    'cursor': trx.public_id
                }
                if trx.command != 'delete':
                    obj = objects.get(trx.record_id)
                    if obj is None:
                        continue
                    repr_ = encode(
                        obj, namespace_public_id=namespace.public_id,
                        expand=expand)
                    delta['attributes'] = repr_

                results.append((trx.id, delta))

        if results:
            # Sort deltas by id of the underlying transactions.
            results.sort()
            deltas = [d for _, d in results]
            return (deltas, results[-1][0])
        else:
            # It's possible that none of the referenced objects exist any more,
            # meaning the result list is empty. In that case, keep traversing
            # the log until we get actual results or reach the end.
            pointer = transactions[-1].id
Пример #19
0
def format_transactions_after_pointer(namespace,
                                      pointer,
                                      db_session,
                                      result_limit,
                                      exclude_types=None,
                                      include_types=None,
                                      exclude_folders=True,
                                      exclude_metadata=True,
                                      exclude_account=True,
                                      expand=False,
                                      is_n1=False):
    """
    Return a pair (deltas, new_pointer), where deltas is a list of change
    events, represented as dictionaries:
    {
      "object": <API object type, e.g. "thread">,
      "event": <"create", "modify", or "delete>,
      "attributes": <API representation of the object for insert/update events>
      "cursor": <public_id of the transaction>
    }

    and new_pointer is the integer id of the last included transaction

    Arguments
    ---------
    namespace_id: int
        Id of the namespace for which to get changes.
    pointer: int
        Process transactions starting after this id.
    db_session: new_session
        database session
    result_limit: int
        Maximum number of results to return. (Because we may roll up multiple
        changes to the same object, fewer results can be returned.)
    format_transaction_fn: function pointer
        Function that defines how to format the transactions.
    exclude_types: list, optional
        If given, don't include transactions for these types of objects.

    """
    exclude_types = set(exclude_types) if exclude_types else set()
    # Begin backwards-compatibility shim -- suppress new object types for now,
    # because clients may not be able to deal with them.
    if exclude_folders is True:
        exclude_types.update(('folder', 'label'))
    if exclude_account is True:
        exclude_types.add('account')
    # End backwards-compatibility shim.

    # Metadata is excluded by default, and can only be included by setting the
    # exclude_metadata flag to False. If listed in include_types, remove it.
    if exclude_metadata is True:
        exclude_types.add('metadata')
    if include_types is not None and 'metadata' in include_types:
        include_types.remove('metadata')

    last_trx = _get_last_trx_id_for_namespace(namespace.id, db_session)
    if last_trx == pointer:
        return ([], pointer)

    while True:
        transactions = db_session.query(Transaction). \
            filter(
                Transaction.id > pointer,
                Transaction.namespace_id == namespace.id)

        if exclude_types is not None:
            transactions = transactions.filter(
                ~Transaction.object_type.in_(exclude_types))

        if include_types is not None:
            transactions = transactions.filter(
                Transaction.object_type.in_(include_types))

        transactions = transactions. \
            order_by(asc(Transaction.id)).limit(result_limit).all()

        if not transactions:
            return ([], pointer)

        results = []

        # Group deltas by object type.
        trxs_by_obj_type = collections.defaultdict(list)
        for trx in transactions:
            trxs_by_obj_type[trx.object_type].append(trx)

        for obj_type, trxs in trxs_by_obj_type.items():
            # Build a dictionary mapping pairs (record_id, command) to
            # transaction. If successive modifies for a given record id appear
            # in the list of transactions, this will only keep the latest
            # one (which is what we want).
            latest_trxs = {(trx.record_id, trx.command): trx
                           for trx in sorted(trxs, key=lambda t: t.id)
                           }.values()
            # Load all referenced not-deleted objects.
            ids_to_query = [
                trx.record_id for trx in latest_trxs if trx.command != 'delete'
            ]

            object_cls = transaction_objects()[obj_type]

            if object_cls == Account:
                # The base query for Account queries the /Namespace/ table
                # since the API-returned "`account`" is a `namespace`
                # under-the-hood.
                query = db_session.query(Namespace).join(Account).filter(
                    Account.id.in_(ids_to_query), Namespace.id == namespace.id)

                # Key by /namespace.account_id/ --
                # namespace.id may not be equal to account.id
                # and trx.record_id == account.id for `account` trxs.
                objects = {obj.account_id: obj for obj in query}
            else:
                query = db_session.query(object_cls).filter(
                    object_cls.id.in_(ids_to_query),
                    object_cls.namespace_id == namespace.id)

                if object_cls == Thread:
                    query = query.options(*Thread.api_loading_options(expand))
                elif object_cls == Message:
                    query = query.options(*Message.api_loading_options(expand))

                objects = {obj.id: obj for obj in query}

            for trx in latest_trxs:
                delta = {
                    'object': trx.object_type,
                    'event': EVENT_NAME_FOR_COMMAND[trx.command],
                    'id': trx.object_public_id,
                    'cursor': trx.public_id
                }
                if trx.command != 'delete':
                    obj = objects.get(trx.record_id)
                    if obj is None:
                        continue
                    repr_ = encode(obj,
                                   namespace_public_id=namespace.public_id,
                                   expand=expand,
                                   is_n1=is_n1)
                    delta['attributes'] = repr_

                results.append((trx.id, delta))

        if results:
            # Sort deltas by id of the underlying transactions.
            results.sort()
            deltas = [d for _, d in results]
            return (deltas, results[-1][0])
        else:
            # It's possible that none of the referenced objects exist any more,
            # meaning the result list is empty. In that case, keep traversing
            # the log until we get actual results or reach the end.
            pointer = transactions[-1].id
Пример #20
0
def format_transactions_after_pointer(namespace, pointer, db_session,
                                      result_limit, exclude_types=None,
                                      include_types=None):
    """
    Return a pair (deltas, new_pointer), where deltas is a list of change
    events, represented as dictionaries:
    {
      "object": <API object type, e.g. "thread">,
      "event": <"create", "modify", or "delete>,
      "attributes": <API representation of the object for insert/update events>
      "cursor": <public_id of the transaction>
    }

    and new_pointer is the integer id of the last included transaction

    Arguments
    ---------
    namespace_id: int
        Id of the namespace for which to get changes.
    pointer: int
        Process transactions starting after this id.
    db_session: InboxSession
        database session
    result_limit: int
        Maximum number of results to return. (Because we may roll up multiple
        changes to the same object, fewer results can be returned.)
    format_transaction_fn: function pointer
        Function that defines how to format the transactions.
    exclude_types: list, optional
        If given, don't include transactions for these types of objects.

    """
    while True:
        # deleted_at condition included to allow this query to be satisfied via
        # the legacy index on (namespace_id, deleted_at) for performance.
        # Also need to explicitly specify the index hint because the query
        # planner is dumb as nails and otherwise would make this super slow for
        # some values of namespace_id and pointer.
        # TODO(emfree): Remove this hack and ensure that the right index (on
        # namespace_id only) exists.
        transactions = db_session.query(Transaction). \
            filter(
                Transaction.id > pointer,
                Transaction.namespace_id == namespace.id,
                Transaction.deleted_at.is_(None)). \
            with_hint(Transaction, 'USE INDEX (namespace_id_deleted_at)')

        if exclude_types is not None:
            transactions = transactions.filter(
                ~Transaction.object_type.in_(exclude_types))

        if include_types is not None:
            transactions = transactions.filter(
                Transaction.object_type.in_(include_types))

        transactions = transactions. \
            order_by(asc(Transaction.id)).limit(result_limit).all()

        if not transactions:
            return ([], pointer)

        results = []

        # Group deltas by object type.
        trxs_by_obj_type = collections.defaultdict(list)
        for trx in transactions:
            trxs_by_obj_type[trx.object_type].append(trx)

        for obj_type, trxs in trxs_by_obj_type.items():
            # Build a dictionary mapping pairs (record_id, command) to
            # transaction. If successive modifies for a given record id appear
            # in the list of transactions, this will only keep the latest
            # one (which is what we want).
            latest_trxs = {(trx.record_id, trx.command): trx for trx in
                           sorted(trxs, key=lambda t: t.id)}.values()
            # Load all referenced not-deleted objects.
            ids_to_query = [trx.record_id for trx in latest_trxs
                            if trx.command != 'delete']

            object_cls = transaction_objects()[obj_type]
            query = db_session.query(object_cls).filter(
                object_cls.id.in_(ids_to_query),
                object_cls.namespace_id == namespace.id)
            if object_cls in QUERY_OPTIONS:
                query = query.options(*QUERY_OPTIONS[object_cls])
            objects = {obj.id: obj for obj in query}

            for trx in latest_trxs:
                delta = {
                    'object': trx.object_type,
                    'event': EVENT_NAME_FOR_COMMAND[trx.command],
                    'id': trx.object_public_id,
                    'cursor': trx.public_id
                }
                if trx.command != 'delete':
                    obj = objects.get(trx.record_id)
                    if obj is None:
                        continue
                    repr_ = encode(
                        obj, namespace_public_id=namespace.public_id)
                    delta['attributes'] = repr_

                results.append((trx.id, delta))

        if results:
            # Sort deltas by id of the underlying transactions.
            results.sort()
            deltas = [d for _, d in results]
            return (deltas, results[-1][0])
        else:
            # It's possible that none of the referenced objects exist any more,
            # meaning the result list is empty. In that case, keep traversing
            # the log until we get actual results or reach the end.
            pointer = transactions[-1].id