Exemplo n.º 1
0
def ingest_item(item,
                provider,
                feeding_service,
                rule_set=None,
                routing_scheme=None,
                expiry=None):
    items_ids = []
    try:
        ingest_collection = get_ingest_collection(feeding_service, item)
        ingest_service = superdesk.get_resource_service(ingest_collection)

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD,
                            generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item["ingest_provider"] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault("source", provider.get("source", ""))
        item.setdefault("uri", item[GUID_FIELD])  # keep it as original guid

        if item.get("profile"):
            try:
                item["profile"] = bson.ObjectId(item["profile"])
            except bson.errors.InvalidId:
                pass
            profile = superdesk.get_resource_service("content_types").find_one(
                req=None, _id=item["profile"])
            if not profile:  # unknown profile
                item.pop("profile")

        set_default_state(item, CONTENT_STATE.INGESTED)
        item["expiry"] = (
            get_expiry_date(
                provider.get("content_expiry")
                or app.config["INGEST_EXPIRY_MINUTES"],
                item.get("versioncreated")) if not expiry else expiry
        )  # when fetching associated item set expiry to match parent

        if "anpa_category" in item:
            process_anpa_category(item, provider)

        if "subject" in item:
            if not app.config.get("INGEST_SKIP_IPTC_CODES", False):
                # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented
                process_iptc_codes(item, provider)
            if "anpa_category" not in item:
                derive_category(item, provider)
        elif "anpa_category" in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get("pubstatus", "") == "canceled":
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item, feeding_service)

        rend = item.get("renditions", {})
        if rend:
            baseImageRend = rend.get("baseImage") or next(iter(rend.values()))
            if baseImageRend and not baseImageRend.get(
                    "media"):  # if there is media should be processed already
                href = feeding_service.prepare_href(baseImageRend["href"],
                                                    rend.get("mimetype"))
                update_renditions(item, href, old_item)

        # if the item has associated media
        for key, assoc in item.get("associations", {}).items():
            set_default_state(assoc, CONTENT_STATE.INGESTED)
            # wire up the id of the associated feature media to the ingested one
            guid = assoc.get("guid")
            assoc_name = assoc.get("headline") or assoc.get("slugline") or guid
            if guid:
                ingested = ingest_service.find_one(req=None, guid=guid)
                if ingested is not None:
                    logger.info("assoc ingested before %s", assoc_name)
                    assoc["_id"] = ingested["_id"]
                    # update expiry so assoc will stay as long as the item using it
                    ingest_service.system_update(ingested["_id"],
                                                 {"expiry": item["expiry"]},
                                                 ingested)
                    if is_new_version(assoc, ingested) and assoc.get(
                            "renditions"):  # new version
                        logger.info(
                            "new assoc version - re-transfer renditions for %s",
                            assoc_name)
                        try:
                            transfer_renditions(assoc["renditions"])
                        except SuperdeskApiError:
                            logger.exception(
                                "failed to update associated item renditions",
                                extra=dict(
                                    guid=guid,
                                    name=assoc_name,
                                ),
                            )
                    else:
                        logger.info(
                            "same/old version - use already fetched renditions for %s",
                            assoc_name)
                        update_assoc_renditions(assoc, ingested)
                else:  # there is no such item in the system - ingest it
                    if assoc.get("renditions") and has_system_renditions(
                            assoc):  # all set, just download
                        logger.info(
                            "new association with system renditions - transfer %s",
                            assoc_name)
                        try:
                            transfer_renditions(assoc["renditions"])
                        except SuperdeskApiError:
                            logger.exception(
                                "failed to download renditions",
                                extra=dict(
                                    guid=guid,
                                    name=assoc_name,
                                ),
                            )
                    status, ids = ingest_item(assoc,
                                              provider,
                                              feeding_service,
                                              rule_set,
                                              expiry=item["expiry"])
                    if status:
                        assoc["_id"] = ids[0]
                        items_ids.extend(ids)
                        ingested = ingest_service.find_one(req=None,
                                                           _id=ids[0])
                        update_assoc_renditions(assoc, ingested)
            elif assoc.get("residRef"):
                item["associations"][key] = resolve_ref(assoc)

        new_version = True
        if old_item:
            new_version = is_new_version(item, old_item)
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD],
                                          updates, old_item)
            item.update(old_item)
            item.update(updates)
            items_ids.append(item["_id"])
        else:
            if item.get("ingest_provider_sequence") is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                items_ids.extend(ingest_service.post_in_mongo([item]))
            except HTTPException as e:
                logger.error(
                    "Exception while persisting item in %s collection: %s",
                    ingest_collection, e)
                raise e

        if routing_scheme and new_version:
            routed = ingest_service.find_one(
                _id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service(
                "routing_schemes").apply_routing_scheme(
                    routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        ProviderError.ingestItemError(ex, provider, item=item)
        return False, []
    return True, items_ids
Exemplo n.º 2
0
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None):
    items_ids = []
    try:
        ingest_collection = feeding_service.service if hasattr(feeding_service, 'service') else 'ingest'
        ingest_service = superdesk.get_resource_service(ingest_collection)

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, CONTENT_STATE.INGESTED)
        item['expiry'] = get_expiry_date(provider.get('content_expiry') or app.config['INGEST_EXPIRY_MINUTES'],
                                         item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            if not app.config.get('INGEST_SKIP_IPTC_CODES', False):
                # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented
                process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get('pubstatus', '') == 'canceled':
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item, feeding_service)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype'))
                update_renditions(item, href, old_item)

        # if the item has associated media
        for key, assoc in item.get('associations', {}).items():
            set_default_state(assoc, CONTENT_STATE.INGESTED)
            if assoc.get('renditions'):
                transfer_renditions(assoc['renditions'])
            # wire up the id of the associated feature media to the ingested one
            guid = assoc.get('guid')
            if guid:
                lookup = {'guid': guid}
                ingested = ingest_service.get_from_mongo(req=None, lookup=lookup)
                if ingested.count() >= 1:
                    assoc['_id'] = ingested[0]['_id']
                    for rendition in ingested[0].get('renditions', {}):  # add missing renditions
                        assoc['renditions'].setdefault(
                            rendition,
                            ingested[0]['renditions'][rendition])
                else:  # there is no such item in the system - ingest it
                    status, ids = ingest_item(assoc, provider, feeding_service, rule_set)
                    if status:
                        assoc['_id'] = ids[0]
                        items_ids.extend(ids)

        new_version = True
        if old_item:
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item)
            item.update(old_item)
            item.update(updates)
            items_ids.append(item['_id'])
            # if the feed is versioned and this is not a new version
            if 'version' in item and 'version' in old_item and item.get('version') == old_item.get('version'):
                new_version = False
        else:
            if item.get('ingest_provider_sequence') is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                items_ids.extend(ingest_service.post_in_mongo([item]))
            except HTTPException as e:
                logger.error('Exception while persisting item in %s collection: %s', ingest_collection, e)
                raise e

        if routing_scheme and new_version:
            routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        ProviderError.ingestItemError(ex, provider, item=item)
        return False, []
    return True, items_ids
Exemplo n.º 3
0
def ingest_item(item,
                provider,
                feeding_service,
                rule_set=None,
                routing_scheme=None):
    items_ids = []
    try:
        ingest_collection = feeding_service.service if hasattr(
            feeding_service, 'service') else 'ingest'
        ingest_service = superdesk.get_resource_service(ingest_collection)

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD,
                            generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        item.setdefault('uri', item[GUID_FIELD])  # keep it as original guid

        if item.get('profile'):
            try:
                item['profile'] = bson.ObjectId(item['profile'])
            except bson.errors.InvalidId:
                pass

        set_default_state(item, CONTENT_STATE.INGESTED)
        item['expiry'] = get_expiry_date(
            provider.get('content_expiry')
            or app.config['INGEST_EXPIRY_MINUTES'], item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            if not app.config.get('INGEST_SKIP_IPTC_CODES', False):
                # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented
                process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get('pubstatus', '') == 'canceled':
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item, feeding_service)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend and not baseImageRend.get(
                    'media'):  # if there is media should be processed already
                href = feeding_service.prepare_href(baseImageRend['href'],
                                                    rend.get('mimetype'))
                update_renditions(item, href, old_item)

        # if the item has associated media
        for key, assoc in item.get('associations', {}).items():
            set_default_state(assoc, CONTENT_STATE.INGESTED)
            # wire up the id of the associated feature media to the ingested one
            guid = assoc.get('guid')
            assoc_name = assoc.get('headline') or assoc.get('slugline') or guid
            if guid:
                ingested = ingest_service.find_one(req=None, guid=guid)
                logger.info('assoc ingested before %s', assoc_name)
                if ingested is not None:
                    assoc['_id'] = ingested['_id']
                    if is_new_version(assoc, ingested) and assoc.get(
                            'renditions'):  # new version
                        logger.info(
                            'new assoc version - re-transfer renditions for %s',
                            assoc_name)
                        transfer_renditions(assoc['renditions'])
                    else:
                        logger.info(
                            'same/old version - use already fetched renditions for %s',
                            assoc_name)
                        update_assoc_renditions(assoc, ingested)
                else:  # there is no such item in the system - ingest it
                    if assoc.get('renditions') and has_system_renditions(
                            assoc):  # all set, just download
                        logger.info(
                            'new association  with system renditions - transfer %s',
                            assoc_name)
                        transfer_renditions(assoc['renditions'])
                    status, ids = ingest_item(assoc, provider, feeding_service,
                                              rule_set)
                    if status:
                        assoc['_id'] = ids[0]
                        items_ids.extend(ids)
                        ingested = ingest_service.find_one(req=None,
                                                           _id=ids[0])
                        update_assoc_renditions(assoc, ingested)
            elif assoc.get('residRef'):
                item['associations'][key] = resolve_ref(assoc)

        new_version = True
        if old_item:
            new_version = is_new_version(item, old_item)
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD],
                                          updates, old_item)
            item.update(old_item)
            item.update(updates)
            items_ids.append(item['_id'])
        else:
            if item.get('ingest_provider_sequence') is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                items_ids.extend(ingest_service.post_in_mongo([item]))
            except HTTPException as e:
                logger.error(
                    'Exception while persisting item in %s collection: %s',
                    ingest_collection, e)
                raise e

        if routing_scheme and new_version:
            routed = ingest_service.find_one(
                _id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service(
                'routing_schemes').apply_routing_scheme(
                    routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        ProviderError.ingestItemError(ex, provider, item=item)
        return False, []
    return True, items_ids