Пример #1
0
def on_create_item(docs, repo_type=ARCHIVE):
    """Make sure item has basic fields populated."""

    for doc in docs:
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get(GUID_FIELD):
            doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)

        if 'unique_id' not in doc:
            generate_unique_id_and_name(doc, repo_type)

        if 'family_id' not in doc:
            doc['family_id'] = doc[GUID_FIELD]

        if 'event_id' not in doc:
            doc['event_id'] = generate_guid(type=GUID_TAG)

        set_default_state(doc, CONTENT_STATE.DRAFT)
        doc.setdefault(config.ID_FIELD, doc[GUID_FIELD])

        copy_metadata_from_user_preferences(doc, repo_type)

        if not doc.get(ITEM_OPERATION):
            doc[ITEM_OPERATION] = ITEM_CREATE
Пример #2
0
def on_create_item(docs, repo_type=ARCHIVE):
    """Make sure item has basic fields populated."""

    for doc in docs:
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get(GUID_FIELD):
            doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)

        if "unique_id" not in doc:
            generate_unique_id_and_name(doc, repo_type)

        if "family_id" not in doc:
            doc["family_id"] = doc[GUID_FIELD]

        if "event_id" not in doc:
            doc["event_id"] = generate_guid(type=GUID_TAG)

        set_default_state(doc, CONTENT_STATE.DRAFT)
        doc.setdefault(config.ID_FIELD, doc[GUID_FIELD])
        set_dateline(doc, repo_type)
        set_byline(doc, repo_type)
        set_sign_off(doc, repo_type=repo_type)

        if not doc.get(ITEM_OPERATION):
            doc[ITEM_OPERATION] = ITEM_CREATE
Пример #3
0
def on_create_item(docs, repo_type=ARCHIVE):
    """Make sure item has basic fields populated."""

    for doc in docs:
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get(GUID_FIELD):
            doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)

        if 'unique_id' not in doc:
            generate_unique_id_and_name(doc, repo_type)

        if 'family_id' not in doc:
            doc['family_id'] = doc[GUID_FIELD]

        if 'event_id' not in doc:
            doc['event_id'] = generate_guid(type=GUID_TAG)

        set_default_state(doc, CONTENT_STATE.DRAFT)
        doc.setdefault(config.ID_FIELD, doc[GUID_FIELD])

        if repo_type == ARCHIVE and not doc.get('ingest_provider'):
            # set the source for the article
            set_default_source(doc)

        copy_metadata_from_user_preferences(doc, repo_type)

        if 'profile' not in doc and app.config.get('DEFAULT_CONTENT_TYPE', None):
            doc['profile'] = app.config.get('DEFAULT_CONTENT_TYPE', None)

        if not doc.get(ITEM_OPERATION):
            doc[ITEM_OPERATION] = ITEM_CREATE
Пример #4
0
 def create(self, docs):
     for doc in docs:
         set_default_state(doc, STATE_INGESTED)
     on_create_item(
         docs
     )  # do it after setting the state otherwise it will make it draft
     return super().create(docs)
Пример #5
0
def ingest_item(item, provider, rule_set=None, routing_scheme=None):
    try:
        item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML))
        item[FAMILY_ID] = item[superdesk.config.ID_FIELD]
        providers[provider.get('type')].provider = provider

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, STATE_INGESTED)
        item['expiry'] = get_expiry_date(provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']),
                                         item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        ingest_service = superdesk.get_resource_service('ingest')

        if item.get('ingest_provider_sequence') is None:
            ingest_service.set_ingest_provider_sequence(item, provider)

        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = providers[provider.get('type')].prepare_href(baseImageRend['href'])
                update_renditions(item, href, old_item)

        if old_item:
            # In case we already have the item, preserve the _id
            item[superdesk.config.ID_FIELD] = old_item[superdesk.config.ID_FIELD]
            ingest_service.put_in_mongo(item[superdesk.config.ID_FIELD], item)
        else:
            try:
                ingest_service.post_in_mongo([item])
            except HTTPException as e:
                logger.error("Exception while persisting item in ingest collection", e)

        if routing_scheme:
            routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme)
    except Exception as ex:
        logger.exception(ex)
        try:
            superdesk.app.sentry.captureException()
        except:
            pass
        return False
    return True
Пример #6
0
    def on_create(self, docs):
        for doc in docs:
            set_default_state(doc, STATE_INGESTED)
            handle_existing_data(doc, doc_type='ingest')

        on_create_item(
            docs, repo_type='ingest'
        )  # do it after setting the state otherwise it will make it draft
Пример #7
0
def ingest_item(item, provider, rule_set=None, routing_scheme=None):
    try:
        item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML))
        item[FAMILY_ID] = item[superdesk.config.ID_FIELD]
        providers[provider.get("type")].provider = provider

        item["ingest_provider"] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault("source", provider.get("source", ""))
        set_default_state(item, STATE_INGESTED)
        item["expiry"] = get_expiry_date(
            provider.get("content_expiry", INGEST_EXPIRY_MINUTES), item.get("versioncreated")
        )

        if "anpa-category" in item:
            process_anpa_category(item, provider)

        if "subject" in item:
            process_iptc_codes(item, provider)

        apply_rule_set(item, provider, rule_set)

        ingest_service = superdesk.get_resource_service("ingest")

        if item.get("ingest_provider_sequence") is None:
            ingest_service.set_ingest_provider_sequence(item, provider)

        rend = item.get("renditions", {})
        if rend:
            baseImageRend = rend.get("baseImage") or next(iter(rend.values()))
            if baseImageRend:
                href = providers[provider.get("type")].prepare_href(baseImageRend["href"])
                update_renditions(item, href)

        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if old_item:
            # In case we already have the item, preserve the _id
            item[superdesk.config.ID_FIELD] = old_item[superdesk.config.ID_FIELD]
            ingest_service.put_in_mongo(item[superdesk.config.ID_FIELD], item)
        else:
            try:
                ingest_service.post_in_mongo([item])
            except HTTPException as e:
                logger.error("Exception while persisting item in ingest collection", e)

        if routing_scheme:
            routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service("routing_schemes").apply_routing_scheme(routed, provider, routing_scheme)
    except Exception as ex:
        logger.exception(ex)
        try:
            superdesk.app.sentry.captureException()
        except:
            pass
        return False
    return True
Пример #8
0
    def on_create(self, docs):
        for doc in docs:
            set_default_state(doc, CONTENT_STATE.INGESTED)
            if not app.config.get('DEFAULT_CONTENT_TYPE', None):
                doc.setdefault(ITEM_PRIORITY, int(config.DEFAULT_PRIORITY_VALUE_FOR_INGESTED_ARTICLES))
                doc.setdefault(ITEM_URGENCY, int(config.DEFAULT_URGENCY_VALUE_FOR_INGESTED_ARTICLES))
            handle_existing_data(doc, doc_type='ingest')
            update_word_count(doc)

        on_create_item(docs, repo_type='ingest')  # do it after setting the state otherwise it will make it draft
Пример #9
0
def on_create_item(docs, repo_type=ARCHIVE):
    """Make sure item has basic fields populated."""

    for doc in docs:
        editor_utils.generate_fields(doc)
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get(GUID_FIELD):
            doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)

        if "unique_id" not in doc:
            generate_unique_id_and_name(doc, repo_type)

        if "family_id" not in doc:
            doc["family_id"] = doc[GUID_FIELD]

        if "event_id" not in doc and repo_type != "ingest":
            doc["event_id"] = generate_guid(type=GUID_TAG)

        set_default_state(doc, CONTENT_STATE.DRAFT)
        doc.setdefault(config.ID_FIELD, doc[GUID_FIELD])

        if repo_type == ARCHIVE:
            # set the source for the article
            set_default_source(doc)

        if "profile" not in doc and app.config.get("DEFAULT_CONTENT_TYPE",
                                                   None):
            doc["profile"] = app.config.get("DEFAULT_CONTENT_TYPE", None)

        copy_metadata_from_profile(doc)
        copy_metadata_from_user_preferences(doc, repo_type)

        if "language" not in doc:
            doc["language"] = app.config.get("DEFAULT_LANGUAGE", "en")

            if doc.get("task", None) and doc["task"].get("desk", None):
                desk = superdesk.get_resource_service("desks").find_one(
                    req=None, _id=doc["task"]["desk"])
                if desk and desk.get("desk_language", None):
                    doc["language"] = desk["desk_language"]

        if not doc.get(ITEM_OPERATION):
            doc[ITEM_OPERATION] = ITEM_CREATE

        if doc.get("template"):
            from apps.templates.content_templates import render_content_template_by_id  # avoid circular import

            doc.pop("fields_meta", None)
            render_content_template_by_id(doc, doc["template"], update=True)
            editor_utils.generate_fields(doc)
Пример #10
0
def on_create_item(docs, repo_type=ARCHIVE):
    """Make sure item has basic fields populated."""

    for doc in docs:
        editor_utils.generate_fields(doc)
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get(GUID_FIELD):
            doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)

        if 'unique_id' not in doc:
            generate_unique_id_and_name(doc, repo_type)

        if 'family_id' not in doc:
            doc['family_id'] = doc[GUID_FIELD]

        if 'event_id' not in doc and repo_type != 'ingest':
            doc['event_id'] = generate_guid(type=GUID_TAG)

        set_default_state(doc, CONTENT_STATE.DRAFT)
        doc.setdefault(config.ID_FIELD, doc[GUID_FIELD])

        if repo_type == ARCHIVE:
            # set the source for the article
            set_default_source(doc)

        if 'profile' not in doc and app.config.get('DEFAULT_CONTENT_TYPE',
                                                   None):
            doc['profile'] = app.config.get('DEFAULT_CONTENT_TYPE', None)

        copy_metadata_from_profile(doc)
        copy_metadata_from_user_preferences(doc, repo_type)

        if 'language' not in doc:
            doc['language'] = app.config.get('DEFAULT_LANGUAGE', 'en')

            if doc.get('task', None) and doc['task'].get('desk', None):
                desk = superdesk.get_resource_service('desks').find_one(
                    req=None, _id=doc['task']['desk'])
                if desk and desk.get('desk_language', None):
                    doc['language'] = desk['desk_language']

        if not doc.get(ITEM_OPERATION):
            doc[ITEM_OPERATION] = ITEM_CREATE

        if doc.get('template'):
            from apps.templates.content_templates import render_content_template_by_id  # avoid circular import
            doc.pop('fields_meta', None)
            render_content_template_by_id(doc, doc['template'], update=True)
            editor_utils.generate_fields(doc)
Пример #11
0
def on_create_item(docs):
    """Make sure item has basic fields populated."""
    for doc in docs:
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get('guid'):
            doc['guid'] = generate_guid(type=GUID_NEWSML)

        if 'unique_id' not in doc:
            generate_unique_id_and_name(doc)

        set_default_state(doc, 'draft')
        doc.setdefault('_id', doc['guid'])
Пример #12
0
def on_create_item(docs):
    """Make sure item has basic fields populated."""
    for doc in docs:
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get('guid'):
            doc['guid'] = generate_guid(type=GUID_NEWSML)

        if 'unique_id' not in doc:
            generate_unique_id_and_name(doc)

        set_default_state(doc, 'draft')
        doc.setdefault('_id', doc['guid'])
Пример #13
0
    def on_create(self, docs):
        for doc in docs:
            set_default_state(doc, CONTENT_STATE.INGESTED)
            doc.setdefault(
                ITEM_PRIORITY,
                int(config.DEFAULT_PRIORITY_VALUE_FOR_INGESTED_ARTICLES))
            doc.setdefault(
                ITEM_URGENCY,
                int(config.DEFAULT_URGENCY_VALUE_FOR_INGESTED_ARTICLES))
            handle_existing_data(doc, doc_type='ingest')
            update_word_count(doc)

        on_create_item(
            docs, repo_type='ingest'
        )  # do it after setting the state otherwise it will make it draft
Пример #14
0
def ingest_item(item, provider, rule_set=None):
    try:
        item.setdefault('_id', item['guid'])
        providers[provider.get('type')].provider = provider

        item['ingest_provider'] = str(provider['_id'])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, STATE_INGESTED)

        if 'anpa-category' in item:
            process_anpa_category(item, provider)

        apply_rule_set(item, provider, rule_set)

        ingest_service = superdesk.get_resource_service('ingest')

        if item.get('ingest_provider_sequence') is None:
            ingest_service.set_ingest_provider_sequence(item, provider)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = providers[provider.get('type')].prepare_href(
                    baseImageRend['href'])
                update_renditions(item, href)

        old_item = ingest_service.find_one(_id=item['guid'], req=None)

        if old_item:
            ingest_service.put(item['guid'], item)
        else:
            try:
                ingest_service.post([item])
            except HTTPException as e:
                logger.error(
                    "Exception while persisting item in ingest collection", e)
                ingest_service.put(item['guid'], item)
    except ProviderError:
        raise
    except Exception as ex:
        raise ProviderError.ingestError(ex, provider)
Пример #15
0
def on_create_item(docs, repo_type=ARCHIVE):
    """Make sure item has basic fields populated."""

    for doc in docs:
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get(GUID_FIELD):
            doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)

        if 'unique_id' not in doc:
            generate_unique_id_and_name(doc, repo_type)

        if 'family_id' not in doc:
            doc['family_id'] = doc[GUID_FIELD]

        if 'event_id' not in doc and repo_type != 'ingest':
            doc['event_id'] = generate_guid(type=GUID_TAG)

        set_default_state(doc, CONTENT_STATE.DRAFT)
        doc.setdefault(config.ID_FIELD, doc[GUID_FIELD])

        if repo_type == ARCHIVE:
            # set the source for the article
            set_default_source(doc)

        if 'profile' not in doc and app.config.get('DEFAULT_CONTENT_TYPE', None):
            doc['profile'] = app.config.get('DEFAULT_CONTENT_TYPE', None)

        copy_metadata_from_profile(doc)
        copy_metadata_from_user_preferences(doc, repo_type)

        if 'language' not in doc:
            doc['language'] = app.config.get('DEFAULT_LANGUAGE', 'en')

            if doc.get('task', None) and doc['task'].get('desk', None):
                desk = superdesk.get_resource_service('desks').find_one(req=None, _id=doc['task']['desk'])
                if desk and desk.get('desk_language', None):
                    doc['language'] = desk['desk_language']

        if not doc.get(ITEM_OPERATION):
            doc[ITEM_OPERATION] = ITEM_CREATE
Пример #16
0
def ingest_item(item, provider, rule_set=None):
    try:
        item.setdefault('_id', item['guid'])
        providers[provider.get('type')].provider = provider

        item['ingest_provider'] = str(provider['_id'])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, STATE_INGESTED)

        if 'anpa-category' in item:
            process_anpa_category(item, provider)

        apply_rule_set(item, provider, rule_set)

        ingest_service = superdesk.get_resource_service('ingest')

        if item.get('ingest_provider_sequence') is None:
            ingest_service.set_ingest_provider_sequence(item, provider)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = providers[provider.get('type')].prepare_href(baseImageRend['href'])
                update_renditions(item, href)

        old_item = ingest_service.find_one(_id=item['guid'], req=None)

        if old_item:
            ingest_service.put(item['guid'], item)
        else:
            try:
                ingest_service.post([item])
            except HTTPException as e:
                logger.error("Exception while persisting item in ingest collection", e)
                ingest_service.put(item['guid'], item)
    except ProviderError:
        raise
    except Exception as ex:
        raise ProviderError.ingestError(ex, provider)
Пример #17
0
def on_create_item(docs, repo_type=ARCHIVE):
    """Make sure item has basic fields populated."""
    for doc in docs:
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get(GUID_FIELD):
            doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)

        if 'unique_id' not in doc:
            generate_unique_id_and_name(doc, repo_type)

        if 'family_id' not in doc:
            doc['family_id'] = doc[GUID_FIELD]

        if 'event_id' not in doc:
            doc['event_id'] = generate_guid(type=GUID_TAG)

        set_default_state(doc, 'draft')
        doc.setdefault('_id', doc[GUID_FIELD])
        if not doc.get(ITEM_OPERATION):
            doc[ITEM_OPERATION] = ITEM_CREATE
Пример #18
0
def on_create_item(docs, repo_type=ARCHIVE):
    """Make sure item has basic fields populated."""

    for doc in docs:
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get(GUID_FIELD):
            doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)

        if 'unique_id' not in doc:
            generate_unique_id_and_name(doc, repo_type)

        if 'family_id' not in doc:
            doc['family_id'] = doc[GUID_FIELD]

        if 'event_id' not in doc:
            doc['event_id'] = generate_guid(type=GUID_TAG)

        set_default_state(doc, CONTENT_STATE.DRAFT)
        doc.setdefault(config.ID_FIELD, doc[GUID_FIELD])

        if repo_type == ARCHIVE and not doc.get('ingest_provider'):
            # set the source for the article
            set_default_source(doc)

        if 'profile' not in doc and app.config.get('DEFAULT_CONTENT_TYPE',
                                                   None):
            doc['profile'] = app.config.get('DEFAULT_CONTENT_TYPE', None)

        copy_metadata_from_profile(doc)
        copy_metadata_from_user_preferences(doc, repo_type)

        if 'language' not in doc:
            doc['language'] = app.config.get('DEFAULT_LANGUAGE', 'en')

        if not doc.get(ITEM_OPERATION):
            doc[ITEM_OPERATION] = ITEM_CREATE
Пример #19
0
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None):
    items_ids = []
    try:
        ingest_collection = feeding_service.service if hasattr(feeding_service, 'service') else 'ingest'
        ingest_service = superdesk.get_resource_service(ingest_collection)

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, CONTENT_STATE.INGESTED)
        item['expiry'] = get_expiry_date(provider.get('content_expiry') or app.config['INGEST_EXPIRY_MINUTES'],
                                         item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            if not app.config.get('INGEST_SKIP_IPTC_CODES', False):
                # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented
                process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get('pubstatus', '') == 'canceled':
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item, feeding_service)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype'))
                update_renditions(item, href, old_item)

        # if the item has associated media
        for key, assoc in item.get('associations', {}).items():
            set_default_state(assoc, CONTENT_STATE.INGESTED)
            if assoc.get('renditions'):
                transfer_renditions(assoc['renditions'])
            # wire up the id of the associated feature media to the ingested one
            guid = assoc.get('guid')
            if guid:
                lookup = {'guid': guid}
                ingested = ingest_service.get_from_mongo(req=None, lookup=lookup)
                if ingested.count() >= 1:
                    assoc['_id'] = ingested[0]['_id']
                    for rendition in ingested[0].get('renditions', {}):  # add missing renditions
                        assoc['renditions'].setdefault(
                            rendition,
                            ingested[0]['renditions'][rendition])
                else:  # there is no such item in the system - ingest it
                    status, ids = ingest_item(assoc, provider, feeding_service, rule_set)
                    if status:
                        assoc['_id'] = ids[0]
                        items_ids.extend(ids)

        new_version = True
        if old_item:
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item)
            item.update(old_item)
            item.update(updates)
            items_ids.append(item['_id'])
            # if the feed is versioned and this is not a new version
            if 'version' in item and 'version' in old_item and item.get('version') == old_item.get('version'):
                new_version = False
        else:
            if item.get('ingest_provider_sequence') is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                items_ids.extend(ingest_service.post_in_mongo([item]))
            except HTTPException as e:
                logger.error('Exception while persisting item in %s collection: %s', ingest_collection, e)
                raise e

        if routing_scheme and new_version:
            routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        ProviderError.ingestItemError(ex, provider, item=item)
        return False, []
    return True, items_ids
Пример #20
0
 def create(self, docs):
     for doc in docs:
         set_default_state(doc, STATE_INGESTED)
     on_create_item(docs)  # do it after setting the state otherwise it will make it draft
     return super().create(docs)
Пример #21
0
def ingest_item(item,
                provider,
                feeding_service,
                rule_set=None,
                routing_scheme=None):
    try:
        ingest_collection = feeding_service.service if hasattr(
            feeding_service, 'service') else 'ingest'
        ingest_service = superdesk.get_resource_service(ingest_collection)

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD,
                            generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, CONTENT_STATE.INGESTED)
        item['expiry'] = get_expiry_date(
            provider.get('content_expiry',
                         app.config['INGEST_EXPIRY_MINUTES']),
            item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            if not app.config.get('INGEST_SKIP_IPTC_CODES', False):
                # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented
                process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get('pubstatus', '') == 'canceled':
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item, feeding_service)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = feeding_service.prepare_href(baseImageRend['href'],
                                                    rend.get('mimetype'))
                update_renditions(item, href, old_item)

        new_version = True
        items_ids = []
        if old_item:
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD],
                                          updates, old_item)
            item.update(old_item)
            item.update(updates)
            items_ids = [item['_id']]
            # if the feed is versioned and this is not a new version
            if 'version' in item and 'version' in old_item and item.get(
                    'version') == old_item.get('version'):
                new_version = False
        else:
            if item.get('ingest_provider_sequence') is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                items_ids = ingest_service.post_in_mongo([item])
            except HTTPException as e:
                logger.error(
                    'Exception while persisting item in %s collection: %s',
                    ingest_collection, e)

        if routing_scheme and new_version:
            routed = ingest_service.find_one(
                _id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service(
                'routing_schemes').apply_routing_scheme(
                    routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        return False
    return True, items_ids
Пример #22
0
    def on_create(self, docs):
        for doc in docs:
            set_default_state(doc, STATE_INGESTED)
            handle_existing_data(doc, doc_type='ingest')

        on_create_item(docs)  # do it after setting the state otherwise it will make it draft
Пример #23
0
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None):
    items_ids = []
    try:
        ingest_collection = feeding_service.service if hasattr(feeding_service, 'service') else 'ingest'
        ingest_service = superdesk.get_resource_service(ingest_collection)

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, CONTENT_STATE.INGESTED)
        item['expiry'] = get_expiry_date(provider.get('content_expiry') or app.config['INGEST_EXPIRY_MINUTES'],
                                         item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            if not app.config.get('INGEST_SKIP_IPTC_CODES', False):
                # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented
                process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get('pubstatus', '') == 'canceled':
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item, feeding_service)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype'))
                update_renditions(item, href, old_item)

        # if the item has associated media
        for key, assoc in item.get('associations', {}).items():
            if assoc.get('renditions'):
                transfer_renditions(assoc['renditions'])
            # wire up the id of the associated feature media to the ingested one
            guid = assoc.get('guid')
            if guid:
                lookup = {'guid': guid}
                ingested = ingest_service.get_from_mongo(req=None, lookup=lookup)
                if ingested.count() >= 1:
                    assoc['_id'] = ingested[0]['_id']
                    for rendition in ingested[0].get('renditions', {}):  # add missing renditions
                        assoc['renditions'].setdefault(
                            rendition,
                            ingested[0]['renditions'][rendition])
                else:  # there is no such item in the system - ingest it
                    status, ids = ingest_item(assoc, provider, feeding_service, rule_set)
                    if status:
                        assoc['_id'] = ids[0]
                        items_ids.extend(ids)

        new_version = True
        if old_item:
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item)
            item.update(old_item)
            item.update(updates)
            items_ids.append(item['_id'])
            # if the feed is versioned and this is not a new version
            if 'version' in item and 'version' in old_item and item.get('version') == old_item.get('version'):
                new_version = False
        else:
            if item.get('ingest_provider_sequence') is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                items_ids.extend(ingest_service.post_in_mongo([item]))
            except HTTPException as e:
                logger.error('Exception while persisting item in %s collection: %s', ingest_collection, e)

        if routing_scheme and new_version:
            routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        return False, []
    return True, items_ids
Пример #24
0
def ingest_item(item, provider, rule_set=None, routing_scheme=None):
    try:
        item.setdefault(superdesk.config.ID_FIELD,
                        generate_guid(type=GUID_NEWSML))
        item[FAMILY_ID] = item[superdesk.config.ID_FIELD]
        providers[provider.get('type')].provider = provider

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, STATE_INGESTED)
        item['expiry'] = get_expiry_date(
            provider.get('content_expiry',
                         app.config['INGEST_EXPIRY_MINUTES']),
            item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        ingest_service = superdesk.get_resource_service('ingest')

        if item.get('ingest_provider_sequence') is None:
            ingest_service.set_ingest_provider_sequence(item, provider)

        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = providers[provider.get('type')].prepare_href(
                    baseImageRend['href'])
                update_renditions(item, href, old_item)

        new_version = True
        if old_item:
            # In case we already have the item, preserve the _id
            item[superdesk.config.ID_FIELD] = old_item[
                superdesk.config.ID_FIELD]
            ingest_service.put_in_mongo(item[superdesk.config.ID_FIELD], item)
            # if the feed is versioned and this is not a new version
            if 'version' in item and 'version' in old_item and item.get(
                    'version') == old_item.get('version'):
                new_version = False
        else:
            try:
                ingest_service.post_in_mongo([item])
            except HTTPException as e:
                logger.error(
                    "Exception while persisting item in ingest collection", e)

        if routing_scheme and new_version:
            routed = ingest_service.find_one(
                _id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service(
                'routing_schemes').apply_routing_scheme(
                    routed, provider, routing_scheme)
    except Exception as ex:
        logger.exception(ex)
        try:
            superdesk.app.sentry.captureException()
        except:
            pass
        return False
    return True
Пример #25
0
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None):
    try:
        ingest_service = superdesk.get_resource_service('ingest')

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, CONTENT_STATE.INGESTED)
        item['expiry'] = get_expiry_date(provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']),
                                         item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get('pubstatus', '') == 'canceled':
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype'))
                update_renditions(item, href, old_item)

        new_version = True
        if old_item:
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item)
            item.update(old_item)
            item.update(updates)
            # if the feed is versioned and this is not a new version
            if 'version' in item and 'version' in old_item and item.get('version') == old_item.get('version'):
                new_version = False
        else:
            if item.get('ingest_provider_sequence') is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                ingest_service.post_in_mongo([item])
            except HTTPException as e:
                logger.error("Exception while persisting item in ingest collection", e)

        if routing_scheme and new_version:
            routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        try:
            superdesk.app.sentry.captureException()
        except:
            pass
        return False
    return True
Пример #26
0
    def on_create(self, docs):
        for doc in docs:
            set_default_state(doc, STATE_INGESTED)
            set_pub_status(doc)

        on_create_item(docs)  # do it after setting the state otherwise it will make it draft
Пример #27
0
def ingest_item(item,
                provider,
                feeding_service,
                rule_set=None,
                routing_scheme=None,
                expiry=None):
    items_ids = []
    try:
        ingest_collection = get_ingest_collection(feeding_service, item)
        ingest_service = superdesk.get_resource_service(ingest_collection)

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD,
                            generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item["ingest_provider"] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault("source", provider.get("source", ""))
        item.setdefault("uri", item[GUID_FIELD])  # keep it as original guid

        if item.get("profile"):
            try:
                item["profile"] = bson.ObjectId(item["profile"])
            except bson.errors.InvalidId:
                pass
            profile = superdesk.get_resource_service("content_types").find_one(
                req=None, _id=item["profile"])
            if not profile:  # unknown profile
                item.pop("profile")

        set_default_state(item, CONTENT_STATE.INGESTED)
        item["expiry"] = (
            get_expiry_date(
                provider.get("content_expiry")
                or app.config["INGEST_EXPIRY_MINUTES"],
                item.get("versioncreated")) if not expiry else expiry
        )  # when fetching associated item set expiry to match parent

        if "anpa_category" in item:
            process_anpa_category(item, provider)

        if "subject" in item:
            if not app.config.get("INGEST_SKIP_IPTC_CODES", False):
                # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented
                process_iptc_codes(item, provider)
            if "anpa_category" not in item:
                derive_category(item, provider)
        elif "anpa_category" in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get("pubstatus", "") == "canceled":
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item, feeding_service)

        rend = item.get("renditions", {})
        if rend:
            baseImageRend = rend.get("baseImage") or next(iter(rend.values()))
            if baseImageRend and not baseImageRend.get(
                    "media"):  # if there is media should be processed already
                href = feeding_service.prepare_href(baseImageRend["href"],
                                                    rend.get("mimetype"))
                update_renditions(item, href, old_item)

        # if the item has associated media
        for key, assoc in item.get("associations", {}).items():
            set_default_state(assoc, CONTENT_STATE.INGESTED)
            # wire up the id of the associated feature media to the ingested one
            guid = assoc.get("guid")
            assoc_name = assoc.get("headline") or assoc.get("slugline") or guid
            if guid:
                ingested = ingest_service.find_one(req=None, guid=guid)
                if ingested is not None:
                    logger.info("assoc ingested before %s", assoc_name)
                    assoc["_id"] = ingested["_id"]
                    # update expiry so assoc will stay as long as the item using it
                    ingest_service.system_update(ingested["_id"],
                                                 {"expiry": item["expiry"]},
                                                 ingested)
                    if is_new_version(assoc, ingested) and assoc.get(
                            "renditions"):  # new version
                        logger.info(
                            "new assoc version - re-transfer renditions for %s",
                            assoc_name)
                        try:
                            transfer_renditions(assoc["renditions"])
                        except SuperdeskApiError:
                            logger.exception(
                                "failed to update associated item renditions",
                                extra=dict(
                                    guid=guid,
                                    name=assoc_name,
                                ),
                            )
                    else:
                        logger.info(
                            "same/old version - use already fetched renditions for %s",
                            assoc_name)
                        update_assoc_renditions(assoc, ingested)
                else:  # there is no such item in the system - ingest it
                    if assoc.get("renditions") and has_system_renditions(
                            assoc):  # all set, just download
                        logger.info(
                            "new association with system renditions - transfer %s",
                            assoc_name)
                        try:
                            transfer_renditions(assoc["renditions"])
                        except SuperdeskApiError:
                            logger.exception(
                                "failed to download renditions",
                                extra=dict(
                                    guid=guid,
                                    name=assoc_name,
                                ),
                            )
                    status, ids = ingest_item(assoc,
                                              provider,
                                              feeding_service,
                                              rule_set,
                                              expiry=item["expiry"])
                    if status:
                        assoc["_id"] = ids[0]
                        items_ids.extend(ids)
                        ingested = ingest_service.find_one(req=None,
                                                           _id=ids[0])
                        update_assoc_renditions(assoc, ingested)
            elif assoc.get("residRef"):
                item["associations"][key] = resolve_ref(assoc)

        new_version = True
        if old_item:
            new_version = is_new_version(item, old_item)
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD],
                                          updates, old_item)
            item.update(old_item)
            item.update(updates)
            items_ids.append(item["_id"])
        else:
            if item.get("ingest_provider_sequence") is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                items_ids.extend(ingest_service.post_in_mongo([item]))
            except HTTPException as e:
                logger.error(
                    "Exception while persisting item in %s collection: %s",
                    ingest_collection, e)
                raise e

        if routing_scheme and new_version:
            routed = ingest_service.find_one(
                _id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service(
                "routing_schemes").apply_routing_scheme(
                    routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        ProviderError.ingestItemError(ex, provider, item=item)
        return False, []
    return True, items_ids
Пример #28
0
def ingest_item(item,
                provider,
                feeding_service,
                rule_set=None,
                routing_scheme=None):
    items_ids = []
    try:
        ingest_collection = feeding_service.service if hasattr(
            feeding_service, 'service') else 'ingest'
        ingest_service = superdesk.get_resource_service(ingest_collection)

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD,
                            generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        item.setdefault('uri', item[GUID_FIELD])  # keep it as original guid

        if item.get('profile'):
            try:
                item['profile'] = bson.ObjectId(item['profile'])
            except bson.errors.InvalidId:
                pass

        set_default_state(item, CONTENT_STATE.INGESTED)
        item['expiry'] = get_expiry_date(
            provider.get('content_expiry')
            or app.config['INGEST_EXPIRY_MINUTES'], item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            if not app.config.get('INGEST_SKIP_IPTC_CODES', False):
                # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented
                process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get('pubstatus', '') == 'canceled':
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item, feeding_service)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend and not baseImageRend.get(
                    'media'):  # if there is media should be processed already
                href = feeding_service.prepare_href(baseImageRend['href'],
                                                    rend.get('mimetype'))
                update_renditions(item, href, old_item)

        # if the item has associated media
        for key, assoc in item.get('associations', {}).items():
            set_default_state(assoc, CONTENT_STATE.INGESTED)
            # wire up the id of the associated feature media to the ingested one
            guid = assoc.get('guid')
            assoc_name = assoc.get('headline') or assoc.get('slugline') or guid
            if guid:
                ingested = ingest_service.find_one(req=None, guid=guid)
                logger.info('assoc ingested before %s', assoc_name)
                if ingested is not None:
                    assoc['_id'] = ingested['_id']
                    if is_new_version(assoc, ingested) and assoc.get(
                            'renditions'):  # new version
                        logger.info(
                            'new assoc version - re-transfer renditions for %s',
                            assoc_name)
                        transfer_renditions(assoc['renditions'])
                    else:
                        logger.info(
                            'same/old version - use already fetched renditions for %s',
                            assoc_name)
                        update_assoc_renditions(assoc, ingested)
                else:  # there is no such item in the system - ingest it
                    if assoc.get('renditions') and has_system_renditions(
                            assoc):  # all set, just download
                        logger.info(
                            'new association  with system renditions - transfer %s',
                            assoc_name)
                        transfer_renditions(assoc['renditions'])
                    status, ids = ingest_item(assoc, provider, feeding_service,
                                              rule_set)
                    if status:
                        assoc['_id'] = ids[0]
                        items_ids.extend(ids)
                        ingested = ingest_service.find_one(req=None,
                                                           _id=ids[0])
                        update_assoc_renditions(assoc, ingested)
            elif assoc.get('residRef'):
                item['associations'][key] = resolve_ref(assoc)

        new_version = True
        if old_item:
            new_version = is_new_version(item, old_item)
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD],
                                          updates, old_item)
            item.update(old_item)
            item.update(updates)
            items_ids.append(item['_id'])
        else:
            if item.get('ingest_provider_sequence') is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                items_ids.extend(ingest_service.post_in_mongo([item]))
            except HTTPException as e:
                logger.error(
                    'Exception while persisting item in %s collection: %s',
                    ingest_collection, e)
                raise e

        if routing_scheme and new_version:
            routed = ingest_service.find_one(
                _id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service(
                'routing_schemes').apply_routing_scheme(
                    routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        ProviderError.ingestItemError(ex, provider, item=item)
        return False, []
    return True, items_ids