Exemplo n.º 1
0
    def queue_transmission(self, doc, subscribers):
        """
        Method formats and then queues the article for transmission to the passed subscribers.
        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.
        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """

        try:
            queued = False
            no_formatters = []
            for subscriber in subscribers:
                try:
                    if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \
                            subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE:
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in subscriber['destinations']:
                        # Step 2(a)
                        formatter = get_formatter(destination['format'], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination['format'])
                            continue

                        formatted_docs = formatter.format(doc, subscriber)

                        for pub_seq_num, formatted_doc in formatted_docs:
                            publish_queue_item = dict()
                            publish_queue_item['item_id'] = doc['_id']
                            publish_queue_item['item_version'] = doc[config.VERSION]
                            publish_queue_item['formatted_item'] = formatted_doc
                            publish_queue_item['subscriber_id'] = subscriber['_id']
                            publish_queue_item['destination'] = destination
                            publish_queue_item['published_seq_num'] = pub_seq_num
                            publish_queue_item['publish_schedule'] = doc.get('publish_schedule', None)
                            publish_queue_item['unique_name'] = doc.get('unique_name', None)
                            publish_queue_item['content_type'] = doc.get('type', None)
                            publish_queue_item['headline'] = doc.get('headline', None)

                            self.set_state(doc, publish_queue_item)
                            if publish_queue_item.get(ITEM_STATE):
                                publish_queue_item['publishing_action'] = publish_queue_item.get(ITEM_STATE)
                                del publish_queue_item[ITEM_STATE]
                            else:
                                publish_queue_item['publishing_action'] = self.published_state

                            get_resource_service('publish_queue').post([publish_queue_item])
                            queued = True
                except:
                    logger.exception("Failed to queue item for id {} with headline {} for subscriber {}."
                                     .format(doc.get(config.ID_FIELD), doc.get('headline'), subscriber.get('name')))

            return no_formatters, queued
        except:
            raise
def format_document():

    document_id = request.args.get("document_id")
    subscriber_id = request.args.get("subscriber_id")
    formatter_qcode = request.args.get("formatter")

    subscriber = get_resource_service("subscribers").find_one(req=None, _id=subscriber_id)
    doc = get_resource_service("archive").find_one(req=None, _id=document_id)

    formatter = get_formatter(formatter_qcode, doc)
    formatted_docs = formatter.format(article=apply_schema(doc), subscriber=subscriber, codes=None)

    headers = {
        "Access-Control-Allow-Origin": app.config["CLIENT_URL"],
        "Access-Control-Allow-Methods": "GET",
        "Access-Control-Allow-Headers": ",".join(app.config["X_HEADERS"]),
        "Access-Control-Allow-Credentials": "true",
        "Cache-Control": "no-cache, no-store, must-revalidate",
    }

    return Response(formatted_docs[0][1], headers=headers, mimetype=get_mime_type(formatter_qcode))
def format_document():

    document_id = request.args.get('document_id')
    subscriber_id = request.args.get('subscriber_id')
    formatter_qcode = request.args.get('formatter')

    subscriber = get_resource_service('subscribers').find_one(req=None, _id=subscriber_id)
    doc = get_resource_service('archive').find_one(req=None, _id=document_id)

    formatter = get_formatter(formatter_qcode, doc)
    formatted_docs = formatter.format(article=apply_schema(doc),
                                      subscriber=subscriber,
                                      codes=None)

    headers = {
        'Access-Control-Allow-Origin': '*',
        'Access-Control-Allow-Methods': 'GET',
        'Cache-Control': 'no-cache, no-store, must-revalidate'
    }

    return Response(formatted_docs[0][1], headers=headers, mimetype=get_mime_type(formatter_qcode))
def format_document():

    document_id = request.args.get('document_id')
    subscriber_id = request.args.get('subscriber_id')
    formatter_qcode = request.args.get('formatter')

    subscriber = get_resource_service('subscribers').find_one(
        req=None, _id=subscriber_id)
    doc = get_resource_service('archive').find_one(req=None, _id=document_id)

    formatter = get_formatter(formatter_qcode, doc)
    formatted_docs = formatter.format(article=apply_schema(doc),
                                      subscriber=subscriber,
                                      codes=None)

    headers = {
        'Access-Control-Allow-Origin': '*',
        'Access-Control-Allow-Methods': 'GET',
        'Cache-Control': 'no-cache, no-store, must-revalidate'
    }

    return Response(formatted_docs[0][1],
                    headers=headers,
                    mimetype=get_mime_type(formatter_qcode))
Exemplo n.º 5
0
    def queue_transmission(self, doc, subscribers, subscriber_codes={}):
        """
        Method formats and then queues the article for transmission to the passed subscribers.
        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.
        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """

        try:
            queued = False
            no_formatters = []
            for subscriber in subscribers:
                try:
                    if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \
                            subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE:
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in subscriber['destinations']:
                        embed_package_items = doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \
                            PACKAGE_TYPE not in doc and destination['config'].get('packaged', False)
                        if embed_package_items:
                            doc = self._embed_package_items(doc)
                        # Step 2(a)
                        formatter = get_formatter(destination['format'], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination['format'])
                            continue

                        formatted_docs = formatter.format(
                            doc, subscriber,
                            subscriber_codes.get(subscriber[config.ID_FIELD]))

                        for idx, publish_data in enumerate(formatted_docs):
                            if not isinstance(publish_data, dict):
                                pub_seq_num, formatted_doc = publish_data
                                formatted_docs[idx] = {
                                    'published_seq_num': pub_seq_num,
                                    'formatted_item': formatted_doc
                                }
                            else:
                                assert 'published_seq_num' in publish_data and 'formatted_item' in publish_data,\
                                    "missing keys in publish_data"

                        for publish_queue_item in formatted_docs:
                            publish_queue_item['item_id'] = doc['item_id']
                            publish_queue_item['item_version'] = doc[
                                config.VERSION]
                            publish_queue_item['subscriber_id'] = subscriber[
                                config.ID_FIELD]
                            publish_queue_item['codes'] = subscriber_codes.get(
                                subscriber[config.ID_FIELD])
                            publish_queue_item['destination'] = destination
                            # publish_schedule is just to indicate in the queue item is create via scheduled item
                            publish_queue_item[
                                PUBLISH_SCHEDULE] = get_utc_schedule(
                                    doc, PUBLISH_SCHEDULE) or None
                            publish_queue_item['unique_name'] = doc.get(
                                'unique_name', None)
                            publish_queue_item['content_type'] = doc.get(
                                'type', None)
                            publish_queue_item['headline'] = doc.get(
                                'headline', None)
                            publish_queue_item[
                                'publishing_action'] = self.published_state
                            publish_queue_item['ingest_provider'] = \
                                ObjectId(doc.get('ingest_provider')) if doc.get('ingest_provider') else None
                            if doc.get(PUBLISHED_IN_PACKAGE):
                                publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[
                                    PUBLISHED_IN_PACKAGE]
                            try:
                                encoded_item = publish_queue_item.pop(
                                    'encoded_item')
                            except KeyError:
                                pass
                            else:
                                binary = io.BytesIO(encoded_item)
                                publish_queue_item[
                                    'encoded_item_id'] = app.storage.put(
                                        binary)
                            publish_queue_item.pop(ITEM_STATE, None)
                            get_resource_service('publish_queue').post(
                                [publish_queue_item])
                            queued = True
                except:
                    logger.exception(
                        "Failed to queue item for id {} with headline {} for subscriber {}."
                        .format(doc.get(config.ID_FIELD), doc.get('headline'),
                                subscriber.get('name')))

            return no_formatters, queued
        except:
            raise
Exemplo n.º 6
0
    def queue_transmission(self,
                           doc,
                           subscribers,
                           subscriber_codes=None,
                           associations=None,
                           sent=False):
        """Method formats and then queues the article for transmission to the passed subscribers.

        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.

        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """
        if associations is None:
            associations = {}
        if subscriber_codes is None:
            subscriber_codes = {}

        try:
            if config.PUBLISH_ASSOCIATIONS_RESEND and not sent:
                is_correction = doc.get("state") in [
                    "corrected", "being_corrected"
                ]
                is_update = doc.get("rewrite_of")
                is_new = not is_correction and not is_update

                if config.PUBLISH_ASSOCIATIONS_RESEND == "new" and is_new:
                    self.resend_association_items(doc)
                elif config.PUBLISH_ASSOCIATIONS_RESEND == "corrections":
                    self.resend_association_items(doc)
                elif config.PUBLISH_ASSOCIATIONS_RESEND == "updates" and not is_correction:
                    self.resend_association_items(doc)

            queued = False
            no_formatters = []
            for subscriber in subscribers:

                try:
                    if (doc[ITEM_TYPE] not in [
                            CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
                    ] and subscriber.get("subscriber_type", "")
                            == SUBSCRIBER_TYPES.WIRE):
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in self.get_destinations(subscriber):
                        embed_package_items = doc[
                            ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and (
                                destination.get("config") or {}).get(
                                    "packaged", False)
                        if embed_package_items:
                            doc = self._embed_package_items(doc)

                        if doc.get(PUBLISHED_IN_PACKAGE) and (
                                destination.get("config") or {}).get(
                                    "packaged", False):
                            continue

                        # Step 2(a)
                        formatter = get_formatter(destination["format"], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination["format"])
                            continue

                        formatter.set_destination(destination, subscriber)
                        formatted_docs = formatter.format(
                            self.filter_document(doc), subscriber,
                            subscriber_codes.get(subscriber[config.ID_FIELD]))

                        for idx, publish_data in enumerate(formatted_docs):
                            if not isinstance(publish_data, dict):
                                pub_seq_num, formatted_doc = publish_data
                                formatted_docs[idx] = {
                                    "published_seq_num": pub_seq_num,
                                    "formatted_item": formatted_doc,
                                }
                            else:
                                assert ("published_seq_num" in publish_data
                                        and "formatted_item" in publish_data
                                        ), "missing keys in publish_data"

                        for publish_queue_item in formatted_docs:
                            publish_queue_item["item_id"] = doc["item_id"]
                            publish_queue_item["item_version"] = doc[
                                config.VERSION]
                            publish_queue_item["subscriber_id"] = subscriber[
                                config.ID_FIELD]
                            publish_queue_item["codes"] = subscriber_codes.get(
                                subscriber[config.ID_FIELD])
                            publish_queue_item["destination"] = destination
                            # publish_schedule is just to indicate in the queue item is create via scheduled item
                            publish_queue_item[
                                PUBLISH_SCHEDULE] = get_utc_schedule(
                                    doc, PUBLISH_SCHEDULE) or None
                            publish_queue_item["unique_name"] = doc.get(
                                "unique_name", None)
                            publish_queue_item["content_type"] = doc.get(
                                "type", None)
                            publish_queue_item["headline"] = doc.get(
                                "headline", None)
                            publish_queue_item[
                                "publishing_action"] = self.published_state
                            publish_queue_item["ingest_provider"] = (
                                ObjectId(doc.get("ingest_provider"))
                                if doc.get("ingest_provider") else None)
                            publish_queue_item[
                                "associated_items"] = associations.get(
                                    subscriber[config.ID_FIELD], [])
                            publish_queue_item["priority"] = subscriber.get(
                                "priority")

                            if doc.get(PUBLISHED_IN_PACKAGE):
                                publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[
                                    PUBLISHED_IN_PACKAGE]
                            try:
                                encoded_item = publish_queue_item.pop(
                                    "encoded_item")
                            except KeyError:
                                pass
                            else:
                                binary = io.BytesIO(encoded_item)
                                publish_queue_item[
                                    "encoded_item_id"] = app.storage.put(
                                        binary)
                            publish_queue_item.pop(ITEM_STATE, None)

                            # content api delivery will be marked as SUCCESS in queue
                            get_resource_service("publish_queue").post(
                                [publish_queue_item])
                            queued = True

                except Exception:
                    logger.exception(
                        "Failed to queue item for id {} with headline {} for subscriber {}."
                        .format(doc.get(config.ID_FIELD), doc.get("headline"),
                                subscriber.get("name")))

            return no_formatters, queued
        except Exception:
            raise
Exemplo n.º 7
0
    def queue_transmission(self, doc, subscribers, subscriber_codes={}, associations={}):
        """Method formats and then queues the article for transmission to the passed subscribers.

        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.

        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """
        try:
            queued = False
            no_formatters = []
            for subscriber in subscribers:
                try:
                    if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \
                            subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE:
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in self.get_destinations(subscriber):
                        embed_package_items = doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \
                            (destination.get('config') or {}).get('packaged', False)
                        if embed_package_items:
                            doc = self._embed_package_items(doc)

                        if doc.get(PUBLISHED_IN_PACKAGE) and \
                                (destination.get('config') or {}).get('packaged', False):
                            continue

                        # Step 2(a)
                        formatter = get_formatter(destination['format'], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination['format'])
                            continue

                        formatted_docs = formatter.format(apply_schema(doc),
                                                          subscriber,
                                                          subscriber_codes.get(subscriber[config.ID_FIELD]))

                        for idx, publish_data in enumerate(formatted_docs):
                            if not isinstance(publish_data, dict):
                                pub_seq_num, formatted_doc = publish_data
                                formatted_docs[idx] = {'published_seq_num': pub_seq_num,
                                                       'formatted_item': formatted_doc}
                            else:
                                assert 'published_seq_num' in publish_data and 'formatted_item' in publish_data,\
                                    "missing keys in publish_data"

                        for publish_queue_item in formatted_docs:
                            publish_queue_item['item_id'] = doc['item_id']
                            publish_queue_item['item_version'] = doc[config.VERSION]
                            publish_queue_item['subscriber_id'] = subscriber[config.ID_FIELD]
                            publish_queue_item['codes'] = subscriber_codes.get(subscriber[config.ID_FIELD])
                            publish_queue_item['destination'] = destination
                            # publish_schedule is just to indicate in the queue item is create via scheduled item
                            publish_queue_item[PUBLISH_SCHEDULE] = get_utc_schedule(doc, PUBLISH_SCHEDULE) or None
                            publish_queue_item['unique_name'] = doc.get('unique_name', None)
                            publish_queue_item['content_type'] = doc.get('type', None)
                            publish_queue_item['headline'] = doc.get('headline', None)
                            publish_queue_item['publishing_action'] = self.published_state
                            publish_queue_item['ingest_provider'] = \
                                ObjectId(doc.get('ingest_provider')) if doc.get('ingest_provider') else None
                            publish_queue_item['associated_items'] = associations.get(subscriber[config.ID_FIELD], [])

                            if doc.get(PUBLISHED_IN_PACKAGE):
                                publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[PUBLISHED_IN_PACKAGE]
                            try:
                                encoded_item = publish_queue_item.pop('encoded_item')
                            except KeyError:
                                pass
                            else:
                                binary = io.BytesIO(encoded_item)
                                publish_queue_item['encoded_item_id'] = app.storage.put(binary)
                            publish_queue_item.pop(ITEM_STATE, None)

                            # content api delivery will be marked as SUCCESS in queue
                            get_resource_service('publish_queue').post([publish_queue_item])

                            queued = True
                except Exception:
                    logger.exception("Failed to queue item for id {} with headline {} for subscriber {}."
                                     .format(doc.get(config.ID_FIELD), doc.get('headline'), subscriber.get('name')))

            return no_formatters, queued
        except Exception:
            raise