def on_create_item(docs, repo_type=ARCHIVE): """Make sure item has basic fields populated.""" for doc in docs: update_dates_for(doc) set_original_creator(doc) if not doc.get(GUID_FIELD): doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) if 'unique_id' not in doc: generate_unique_id_and_name(doc, repo_type) if 'family_id' not in doc: doc['family_id'] = doc[GUID_FIELD] if 'event_id' not in doc: doc['event_id'] = generate_guid(type=GUID_TAG) set_default_state(doc, CONTENT_STATE.DRAFT) doc.setdefault(config.ID_FIELD, doc[GUID_FIELD]) copy_metadata_from_user_preferences(doc, repo_type) if not doc.get(ITEM_OPERATION): doc[ITEM_OPERATION] = ITEM_CREATE
def on_create_item(docs, repo_type=ARCHIVE): """Make sure item has basic fields populated.""" for doc in docs: update_dates_for(doc) set_original_creator(doc) if not doc.get(GUID_FIELD): doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) if "unique_id" not in doc: generate_unique_id_and_name(doc, repo_type) if "family_id" not in doc: doc["family_id"] = doc[GUID_FIELD] if "event_id" not in doc: doc["event_id"] = generate_guid(type=GUID_TAG) set_default_state(doc, CONTENT_STATE.DRAFT) doc.setdefault(config.ID_FIELD, doc[GUID_FIELD]) set_dateline(doc, repo_type) set_byline(doc, repo_type) set_sign_off(doc, repo_type=repo_type) if not doc.get(ITEM_OPERATION): doc[ITEM_OPERATION] = ITEM_CREATE
def on_create_item(docs, repo_type=ARCHIVE): """Make sure item has basic fields populated.""" for doc in docs: update_dates_for(doc) set_original_creator(doc) if not doc.get(GUID_FIELD): doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) if 'unique_id' not in doc: generate_unique_id_and_name(doc, repo_type) if 'family_id' not in doc: doc['family_id'] = doc[GUID_FIELD] if 'event_id' not in doc: doc['event_id'] = generate_guid(type=GUID_TAG) set_default_state(doc, CONTENT_STATE.DRAFT) doc.setdefault(config.ID_FIELD, doc[GUID_FIELD]) if repo_type == ARCHIVE and not doc.get('ingest_provider'): # set the source for the article set_default_source(doc) copy_metadata_from_user_preferences(doc, repo_type) if 'profile' not in doc and app.config.get('DEFAULT_CONTENT_TYPE', None): doc['profile'] = app.config.get('DEFAULT_CONTENT_TYPE', None) if not doc.get(ITEM_OPERATION): doc[ITEM_OPERATION] = ITEM_CREATE
def create(self, docs): for doc in docs: set_default_state(doc, STATE_INGESTED) on_create_item( docs ) # do it after setting the state otherwise it will make it draft return super().create(docs)
def ingest_item(item, provider, rule_set=None, routing_scheme=None): try: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] providers[provider.get('type')].provider = provider item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD]) item.setdefault('source', provider.get('source', '')) set_default_state(item, STATE_INGESTED) item['expiry'] = get_expiry_date(provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']), item.get('versioncreated')) if 'anpa_category' in item: process_anpa_category(item, provider) if 'subject' in item: process_iptc_codes(item, provider) if 'anpa_category' not in item: derive_category(item, provider) elif 'anpa_category' in item: derive_subject(item) apply_rule_set(item, provider, rule_set) ingest_service = superdesk.get_resource_service('ingest') if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend: href = providers[provider.get('type')].prepare_href(baseImageRend['href']) update_renditions(item, href, old_item) if old_item: # In case we already have the item, preserve the _id item[superdesk.config.ID_FIELD] = old_item[superdesk.config.ID_FIELD] ingest_service.put_in_mongo(item[superdesk.config.ID_FIELD], item) else: try: ingest_service.post_in_mongo([item]) except HTTPException as e: logger.error("Exception while persisting item in ingest collection", e) if routing_scheme: routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) try: superdesk.app.sentry.captureException() except: pass return False return True
def on_create(self, docs): for doc in docs: set_default_state(doc, STATE_INGESTED) handle_existing_data(doc, doc_type='ingest') on_create_item( docs, repo_type='ingest' ) # do it after setting the state otherwise it will make it draft
def ingest_item(item, provider, rule_set=None, routing_scheme=None): try: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] providers[provider.get("type")].provider = provider item["ingest_provider"] = str(provider[superdesk.config.ID_FIELD]) item.setdefault("source", provider.get("source", "")) set_default_state(item, STATE_INGESTED) item["expiry"] = get_expiry_date( provider.get("content_expiry", INGEST_EXPIRY_MINUTES), item.get("versioncreated") ) if "anpa-category" in item: process_anpa_category(item, provider) if "subject" in item: process_iptc_codes(item, provider) apply_rule_set(item, provider, rule_set) ingest_service = superdesk.get_resource_service("ingest") if item.get("ingest_provider_sequence") is None: ingest_service.set_ingest_provider_sequence(item, provider) rend = item.get("renditions", {}) if rend: baseImageRend = rend.get("baseImage") or next(iter(rend.values())) if baseImageRend: href = providers[provider.get("type")].prepare_href(baseImageRend["href"]) update_renditions(item, href) old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) if old_item: # In case we already have the item, preserve the _id item[superdesk.config.ID_FIELD] = old_item[superdesk.config.ID_FIELD] ingest_service.put_in_mongo(item[superdesk.config.ID_FIELD], item) else: try: ingest_service.post_in_mongo([item]) except HTTPException as e: logger.error("Exception while persisting item in ingest collection", e) if routing_scheme: routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service("routing_schemes").apply_routing_scheme(routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) try: superdesk.app.sentry.captureException() except: pass return False return True
def on_create(self, docs): for doc in docs: set_default_state(doc, CONTENT_STATE.INGESTED) if not app.config.get('DEFAULT_CONTENT_TYPE', None): doc.setdefault(ITEM_PRIORITY, int(config.DEFAULT_PRIORITY_VALUE_FOR_INGESTED_ARTICLES)) doc.setdefault(ITEM_URGENCY, int(config.DEFAULT_URGENCY_VALUE_FOR_INGESTED_ARTICLES)) handle_existing_data(doc, doc_type='ingest') update_word_count(doc) on_create_item(docs, repo_type='ingest') # do it after setting the state otherwise it will make it draft
def on_create_item(docs, repo_type=ARCHIVE): """Make sure item has basic fields populated.""" for doc in docs: editor_utils.generate_fields(doc) update_dates_for(doc) set_original_creator(doc) if not doc.get(GUID_FIELD): doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) if "unique_id" not in doc: generate_unique_id_and_name(doc, repo_type) if "family_id" not in doc: doc["family_id"] = doc[GUID_FIELD] if "event_id" not in doc and repo_type != "ingest": doc["event_id"] = generate_guid(type=GUID_TAG) set_default_state(doc, CONTENT_STATE.DRAFT) doc.setdefault(config.ID_FIELD, doc[GUID_FIELD]) if repo_type == ARCHIVE: # set the source for the article set_default_source(doc) if "profile" not in doc and app.config.get("DEFAULT_CONTENT_TYPE", None): doc["profile"] = app.config.get("DEFAULT_CONTENT_TYPE", None) copy_metadata_from_profile(doc) copy_metadata_from_user_preferences(doc, repo_type) if "language" not in doc: doc["language"] = app.config.get("DEFAULT_LANGUAGE", "en") if doc.get("task", None) and doc["task"].get("desk", None): desk = superdesk.get_resource_service("desks").find_one( req=None, _id=doc["task"]["desk"]) if desk and desk.get("desk_language", None): doc["language"] = desk["desk_language"] if not doc.get(ITEM_OPERATION): doc[ITEM_OPERATION] = ITEM_CREATE if doc.get("template"): from apps.templates.content_templates import render_content_template_by_id # avoid circular import doc.pop("fields_meta", None) render_content_template_by_id(doc, doc["template"], update=True) editor_utils.generate_fields(doc)
def on_create_item(docs, repo_type=ARCHIVE): """Make sure item has basic fields populated.""" for doc in docs: editor_utils.generate_fields(doc) update_dates_for(doc) set_original_creator(doc) if not doc.get(GUID_FIELD): doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) if 'unique_id' not in doc: generate_unique_id_and_name(doc, repo_type) if 'family_id' not in doc: doc['family_id'] = doc[GUID_FIELD] if 'event_id' not in doc and repo_type != 'ingest': doc['event_id'] = generate_guid(type=GUID_TAG) set_default_state(doc, CONTENT_STATE.DRAFT) doc.setdefault(config.ID_FIELD, doc[GUID_FIELD]) if repo_type == ARCHIVE: # set the source for the article set_default_source(doc) if 'profile' not in doc and app.config.get('DEFAULT_CONTENT_TYPE', None): doc['profile'] = app.config.get('DEFAULT_CONTENT_TYPE', None) copy_metadata_from_profile(doc) copy_metadata_from_user_preferences(doc, repo_type) if 'language' not in doc: doc['language'] = app.config.get('DEFAULT_LANGUAGE', 'en') if doc.get('task', None) and doc['task'].get('desk', None): desk = superdesk.get_resource_service('desks').find_one( req=None, _id=doc['task']['desk']) if desk and desk.get('desk_language', None): doc['language'] = desk['desk_language'] if not doc.get(ITEM_OPERATION): doc[ITEM_OPERATION] = ITEM_CREATE if doc.get('template'): from apps.templates.content_templates import render_content_template_by_id # avoid circular import doc.pop('fields_meta', None) render_content_template_by_id(doc, doc['template'], update=True) editor_utils.generate_fields(doc)
def on_create_item(docs): """Make sure item has basic fields populated.""" for doc in docs: update_dates_for(doc) set_original_creator(doc) if not doc.get('guid'): doc['guid'] = generate_guid(type=GUID_NEWSML) if 'unique_id' not in doc: generate_unique_id_and_name(doc) set_default_state(doc, 'draft') doc.setdefault('_id', doc['guid'])
def on_create(self, docs): for doc in docs: set_default_state(doc, CONTENT_STATE.INGESTED) doc.setdefault( ITEM_PRIORITY, int(config.DEFAULT_PRIORITY_VALUE_FOR_INGESTED_ARTICLES)) doc.setdefault( ITEM_URGENCY, int(config.DEFAULT_URGENCY_VALUE_FOR_INGESTED_ARTICLES)) handle_existing_data(doc, doc_type='ingest') update_word_count(doc) on_create_item( docs, repo_type='ingest' ) # do it after setting the state otherwise it will make it draft
def ingest_item(item, provider, rule_set=None): try: item.setdefault('_id', item['guid']) providers[provider.get('type')].provider = provider item['ingest_provider'] = str(provider['_id']) item.setdefault('source', provider.get('source', '')) set_default_state(item, STATE_INGESTED) if 'anpa-category' in item: process_anpa_category(item, provider) apply_rule_set(item, provider, rule_set) ingest_service = superdesk.get_resource_service('ingest') if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend: href = providers[provider.get('type')].prepare_href( baseImageRend['href']) update_renditions(item, href) old_item = ingest_service.find_one(_id=item['guid'], req=None) if old_item: ingest_service.put(item['guid'], item) else: try: ingest_service.post([item]) except HTTPException as e: logger.error( "Exception while persisting item in ingest collection", e) ingest_service.put(item['guid'], item) except ProviderError: raise except Exception as ex: raise ProviderError.ingestError(ex, provider)
def on_create_item(docs, repo_type=ARCHIVE): """Make sure item has basic fields populated.""" for doc in docs: update_dates_for(doc) set_original_creator(doc) if not doc.get(GUID_FIELD): doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) if 'unique_id' not in doc: generate_unique_id_and_name(doc, repo_type) if 'family_id' not in doc: doc['family_id'] = doc[GUID_FIELD] if 'event_id' not in doc and repo_type != 'ingest': doc['event_id'] = generate_guid(type=GUID_TAG) set_default_state(doc, CONTENT_STATE.DRAFT) doc.setdefault(config.ID_FIELD, doc[GUID_FIELD]) if repo_type == ARCHIVE: # set the source for the article set_default_source(doc) if 'profile' not in doc and app.config.get('DEFAULT_CONTENT_TYPE', None): doc['profile'] = app.config.get('DEFAULT_CONTENT_TYPE', None) copy_metadata_from_profile(doc) copy_metadata_from_user_preferences(doc, repo_type) if 'language' not in doc: doc['language'] = app.config.get('DEFAULT_LANGUAGE', 'en') if doc.get('task', None) and doc['task'].get('desk', None): desk = superdesk.get_resource_service('desks').find_one(req=None, _id=doc['task']['desk']) if desk and desk.get('desk_language', None): doc['language'] = desk['desk_language'] if not doc.get(ITEM_OPERATION): doc[ITEM_OPERATION] = ITEM_CREATE
def ingest_item(item, provider, rule_set=None): try: item.setdefault('_id', item['guid']) providers[provider.get('type')].provider = provider item['ingest_provider'] = str(provider['_id']) item.setdefault('source', provider.get('source', '')) set_default_state(item, STATE_INGESTED) if 'anpa-category' in item: process_anpa_category(item, provider) apply_rule_set(item, provider, rule_set) ingest_service = superdesk.get_resource_service('ingest') if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend: href = providers[provider.get('type')].prepare_href(baseImageRend['href']) update_renditions(item, href) old_item = ingest_service.find_one(_id=item['guid'], req=None) if old_item: ingest_service.put(item['guid'], item) else: try: ingest_service.post([item]) except HTTPException as e: logger.error("Exception while persisting item in ingest collection", e) ingest_service.put(item['guid'], item) except ProviderError: raise except Exception as ex: raise ProviderError.ingestError(ex, provider)
def on_create_item(docs, repo_type=ARCHIVE): """Make sure item has basic fields populated.""" for doc in docs: update_dates_for(doc) set_original_creator(doc) if not doc.get(GUID_FIELD): doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) if 'unique_id' not in doc: generate_unique_id_and_name(doc, repo_type) if 'family_id' not in doc: doc['family_id'] = doc[GUID_FIELD] if 'event_id' not in doc: doc['event_id'] = generate_guid(type=GUID_TAG) set_default_state(doc, 'draft') doc.setdefault('_id', doc[GUID_FIELD]) if not doc.get(ITEM_OPERATION): doc[ITEM_OPERATION] = ITEM_CREATE
def on_create_item(docs, repo_type=ARCHIVE): """Make sure item has basic fields populated.""" for doc in docs: update_dates_for(doc) set_original_creator(doc) if not doc.get(GUID_FIELD): doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) if 'unique_id' not in doc: generate_unique_id_and_name(doc, repo_type) if 'family_id' not in doc: doc['family_id'] = doc[GUID_FIELD] if 'event_id' not in doc: doc['event_id'] = generate_guid(type=GUID_TAG) set_default_state(doc, CONTENT_STATE.DRAFT) doc.setdefault(config.ID_FIELD, doc[GUID_FIELD]) if repo_type == ARCHIVE and not doc.get('ingest_provider'): # set the source for the article set_default_source(doc) if 'profile' not in doc and app.config.get('DEFAULT_CONTENT_TYPE', None): doc['profile'] = app.config.get('DEFAULT_CONTENT_TYPE', None) copy_metadata_from_profile(doc) copy_metadata_from_user_preferences(doc, repo_type) if 'language' not in doc: doc['language'] = app.config.get('DEFAULT_LANGUAGE', 'en') if not doc.get(ITEM_OPERATION): doc[ITEM_OPERATION] = ITEM_CREATE
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None): items_ids = [] try: ingest_collection = feeding_service.service if hasattr(feeding_service, 'service') else 'ingest' ingest_service = superdesk.get_resource_service(ingest_collection) # determine if we already have this item old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) if not old_item: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD]) item.setdefault('source', provider.get('source', '')) set_default_state(item, CONTENT_STATE.INGESTED) item['expiry'] = get_expiry_date(provider.get('content_expiry') or app.config['INGEST_EXPIRY_MINUTES'], item.get('versioncreated')) if 'anpa_category' in item: process_anpa_category(item, provider) if 'subject' in item: if not app.config.get('INGEST_SKIP_IPTC_CODES', False): # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented process_iptc_codes(item, provider) if 'anpa_category' not in item: derive_category(item, provider) elif 'anpa_category' in item: derive_subject(item) apply_rule_set(item, provider, rule_set) if item.get('pubstatus', '') == 'canceled': item[ITEM_STATE] = CONTENT_STATE.KILLED ingest_cancel(item, feeding_service) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend: href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype')) update_renditions(item, href, old_item) # if the item has associated media for key, assoc in item.get('associations', {}).items(): set_default_state(assoc, CONTENT_STATE.INGESTED) if assoc.get('renditions'): transfer_renditions(assoc['renditions']) # wire up the id of the associated feature media to the ingested one guid = assoc.get('guid') if guid: lookup = {'guid': guid} ingested = ingest_service.get_from_mongo(req=None, lookup=lookup) if ingested.count() >= 1: assoc['_id'] = ingested[0]['_id'] for rendition in ingested[0].get('renditions', {}): # add missing renditions assoc['renditions'].setdefault( rendition, ingested[0]['renditions'][rendition]) else: # there is no such item in the system - ingest it status, ids = ingest_item(assoc, provider, feeding_service, rule_set) if status: assoc['_id'] = ids[0] items_ids.extend(ids) new_version = True if old_item: updates = deepcopy(item) ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item) item.update(old_item) item.update(updates) items_ids.append(item['_id']) # if the feed is versioned and this is not a new version if 'version' in item and 'version' in old_item and item.get('version') == old_item.get('version'): new_version = False else: if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) try: items_ids.extend(ingest_service.post_in_mongo([item])) except HTTPException as e: logger.error('Exception while persisting item in %s collection: %s', ingest_collection, e) raise e if routing_scheme and new_version: routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) ProviderError.ingestItemError(ex, provider, item=item) return False, [] return True, items_ids
def create(self, docs): for doc in docs: set_default_state(doc, STATE_INGESTED) on_create_item(docs) # do it after setting the state otherwise it will make it draft return super().create(docs)
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None): try: ingest_collection = feeding_service.service if hasattr( feeding_service, 'service') else 'ingest' ingest_service = superdesk.get_resource_service(ingest_collection) # determine if we already have this item old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) if not old_item: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD]) item.setdefault('source', provider.get('source', '')) set_default_state(item, CONTENT_STATE.INGESTED) item['expiry'] = get_expiry_date( provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']), item.get('versioncreated')) if 'anpa_category' in item: process_anpa_category(item, provider) if 'subject' in item: if not app.config.get('INGEST_SKIP_IPTC_CODES', False): # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented process_iptc_codes(item, provider) if 'anpa_category' not in item: derive_category(item, provider) elif 'anpa_category' in item: derive_subject(item) apply_rule_set(item, provider, rule_set) if item.get('pubstatus', '') == 'canceled': item[ITEM_STATE] = CONTENT_STATE.KILLED ingest_cancel(item, feeding_service) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend: href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype')) update_renditions(item, href, old_item) new_version = True items_ids = [] if old_item: updates = deepcopy(item) ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item) item.update(old_item) item.update(updates) items_ids = [item['_id']] # if the feed is versioned and this is not a new version if 'version' in item and 'version' in old_item and item.get( 'version') == old_item.get('version'): new_version = False else: if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) try: items_ids = ingest_service.post_in_mongo([item]) except HTTPException as e: logger.error( 'Exception while persisting item in %s collection: %s', ingest_collection, e) if routing_scheme and new_version: routed = ingest_service.find_one( _id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service( 'routing_schemes').apply_routing_scheme( routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) return False return True, items_ids
def on_create(self, docs): for doc in docs: set_default_state(doc, STATE_INGESTED) handle_existing_data(doc, doc_type='ingest') on_create_item(docs) # do it after setting the state otherwise it will make it draft
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None): items_ids = [] try: ingest_collection = feeding_service.service if hasattr(feeding_service, 'service') else 'ingest' ingest_service = superdesk.get_resource_service(ingest_collection) # determine if we already have this item old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) if not old_item: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD]) item.setdefault('source', provider.get('source', '')) set_default_state(item, CONTENT_STATE.INGESTED) item['expiry'] = get_expiry_date(provider.get('content_expiry') or app.config['INGEST_EXPIRY_MINUTES'], item.get('versioncreated')) if 'anpa_category' in item: process_anpa_category(item, provider) if 'subject' in item: if not app.config.get('INGEST_SKIP_IPTC_CODES', False): # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented process_iptc_codes(item, provider) if 'anpa_category' not in item: derive_category(item, provider) elif 'anpa_category' in item: derive_subject(item) apply_rule_set(item, provider, rule_set) if item.get('pubstatus', '') == 'canceled': item[ITEM_STATE] = CONTENT_STATE.KILLED ingest_cancel(item, feeding_service) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend: href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype')) update_renditions(item, href, old_item) # if the item has associated media for key, assoc in item.get('associations', {}).items(): if assoc.get('renditions'): transfer_renditions(assoc['renditions']) # wire up the id of the associated feature media to the ingested one guid = assoc.get('guid') if guid: lookup = {'guid': guid} ingested = ingest_service.get_from_mongo(req=None, lookup=lookup) if ingested.count() >= 1: assoc['_id'] = ingested[0]['_id'] for rendition in ingested[0].get('renditions', {}): # add missing renditions assoc['renditions'].setdefault( rendition, ingested[0]['renditions'][rendition]) else: # there is no such item in the system - ingest it status, ids = ingest_item(assoc, provider, feeding_service, rule_set) if status: assoc['_id'] = ids[0] items_ids.extend(ids) new_version = True if old_item: updates = deepcopy(item) ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item) item.update(old_item) item.update(updates) items_ids.append(item['_id']) # if the feed is versioned and this is not a new version if 'version' in item and 'version' in old_item and item.get('version') == old_item.get('version'): new_version = False else: if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) try: items_ids.extend(ingest_service.post_in_mongo([item])) except HTTPException as e: logger.error('Exception while persisting item in %s collection: %s', ingest_collection, e) if routing_scheme and new_version: routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) return False, [] return True, items_ids
def ingest_item(item, provider, rule_set=None, routing_scheme=None): try: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] providers[provider.get('type')].provider = provider item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD]) item.setdefault('source', provider.get('source', '')) set_default_state(item, STATE_INGESTED) item['expiry'] = get_expiry_date( provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']), item.get('versioncreated')) if 'anpa_category' in item: process_anpa_category(item, provider) if 'subject' in item: process_iptc_codes(item, provider) if 'anpa_category' not in item: derive_category(item, provider) elif 'anpa_category' in item: derive_subject(item) apply_rule_set(item, provider, rule_set) ingest_service = superdesk.get_resource_service('ingest') if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend: href = providers[provider.get('type')].prepare_href( baseImageRend['href']) update_renditions(item, href, old_item) new_version = True if old_item: # In case we already have the item, preserve the _id item[superdesk.config.ID_FIELD] = old_item[ superdesk.config.ID_FIELD] ingest_service.put_in_mongo(item[superdesk.config.ID_FIELD], item) # if the feed is versioned and this is not a new version if 'version' in item and 'version' in old_item and item.get( 'version') == old_item.get('version'): new_version = False else: try: ingest_service.post_in_mongo([item]) except HTTPException as e: logger.error( "Exception while persisting item in ingest collection", e) if routing_scheme and new_version: routed = ingest_service.find_one( _id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service( 'routing_schemes').apply_routing_scheme( routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) try: superdesk.app.sentry.captureException() except: pass return False return True
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None): try: ingest_service = superdesk.get_resource_service('ingest') # determine if we already have this item old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) if not old_item: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD]) item.setdefault('source', provider.get('source', '')) set_default_state(item, CONTENT_STATE.INGESTED) item['expiry'] = get_expiry_date(provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']), item.get('versioncreated')) if 'anpa_category' in item: process_anpa_category(item, provider) if 'subject' in item: process_iptc_codes(item, provider) if 'anpa_category' not in item: derive_category(item, provider) elif 'anpa_category' in item: derive_subject(item) apply_rule_set(item, provider, rule_set) if item.get('pubstatus', '') == 'canceled': item[ITEM_STATE] = CONTENT_STATE.KILLED ingest_cancel(item) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend: href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype')) update_renditions(item, href, old_item) new_version = True if old_item: updates = deepcopy(item) ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item) item.update(old_item) item.update(updates) # if the feed is versioned and this is not a new version if 'version' in item and 'version' in old_item and item.get('version') == old_item.get('version'): new_version = False else: if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) try: ingest_service.post_in_mongo([item]) except HTTPException as e: logger.error("Exception while persisting item in ingest collection", e) if routing_scheme and new_version: routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) try: superdesk.app.sentry.captureException() except: pass return False return True
def on_create(self, docs): for doc in docs: set_default_state(doc, STATE_INGESTED) set_pub_status(doc) on_create_item(docs) # do it after setting the state otherwise it will make it draft
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None, expiry=None): items_ids = [] try: ingest_collection = get_ingest_collection(feeding_service, item) ingest_service = superdesk.get_resource_service(ingest_collection) # determine if we already have this item old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) if not old_item: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] item["ingest_provider"] = str(provider[superdesk.config.ID_FIELD]) item.setdefault("source", provider.get("source", "")) item.setdefault("uri", item[GUID_FIELD]) # keep it as original guid if item.get("profile"): try: item["profile"] = bson.ObjectId(item["profile"]) except bson.errors.InvalidId: pass profile = superdesk.get_resource_service("content_types").find_one( req=None, _id=item["profile"]) if not profile: # unknown profile item.pop("profile") set_default_state(item, CONTENT_STATE.INGESTED) item["expiry"] = ( get_expiry_date( provider.get("content_expiry") or app.config["INGEST_EXPIRY_MINUTES"], item.get("versioncreated")) if not expiry else expiry ) # when fetching associated item set expiry to match parent if "anpa_category" in item: process_anpa_category(item, provider) if "subject" in item: if not app.config.get("INGEST_SKIP_IPTC_CODES", False): # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented process_iptc_codes(item, provider) if "anpa_category" not in item: derive_category(item, provider) elif "anpa_category" in item: derive_subject(item) apply_rule_set(item, provider, rule_set) if item.get("pubstatus", "") == "canceled": item[ITEM_STATE] = CONTENT_STATE.KILLED ingest_cancel(item, feeding_service) rend = item.get("renditions", {}) if rend: baseImageRend = rend.get("baseImage") or next(iter(rend.values())) if baseImageRend and not baseImageRend.get( "media"): # if there is media should be processed already href = feeding_service.prepare_href(baseImageRend["href"], rend.get("mimetype")) update_renditions(item, href, old_item) # if the item has associated media for key, assoc in item.get("associations", {}).items(): set_default_state(assoc, CONTENT_STATE.INGESTED) # wire up the id of the associated feature media to the ingested one guid = assoc.get("guid") assoc_name = assoc.get("headline") or assoc.get("slugline") or guid if guid: ingested = ingest_service.find_one(req=None, guid=guid) if ingested is not None: logger.info("assoc ingested before %s", assoc_name) assoc["_id"] = ingested["_id"] # update expiry so assoc will stay as long as the item using it ingest_service.system_update(ingested["_id"], {"expiry": item["expiry"]}, ingested) if is_new_version(assoc, ingested) and assoc.get( "renditions"): # new version logger.info( "new assoc version - re-transfer renditions for %s", assoc_name) try: transfer_renditions(assoc["renditions"]) except SuperdeskApiError: logger.exception( "failed to update associated item renditions", extra=dict( guid=guid, name=assoc_name, ), ) else: logger.info( "same/old version - use already fetched renditions for %s", assoc_name) update_assoc_renditions(assoc, ingested) else: # there is no such item in the system - ingest it if assoc.get("renditions") and has_system_renditions( assoc): # all set, just download logger.info( "new association with system renditions - transfer %s", assoc_name) try: transfer_renditions(assoc["renditions"]) except SuperdeskApiError: logger.exception( "failed to download renditions", extra=dict( guid=guid, name=assoc_name, ), ) status, ids = ingest_item(assoc, provider, feeding_service, rule_set, expiry=item["expiry"]) if status: assoc["_id"] = ids[0] items_ids.extend(ids) ingested = ingest_service.find_one(req=None, _id=ids[0]) update_assoc_renditions(assoc, ingested) elif assoc.get("residRef"): item["associations"][key] = resolve_ref(assoc) new_version = True if old_item: new_version = is_new_version(item, old_item) updates = deepcopy(item) ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item) item.update(old_item) item.update(updates) items_ids.append(item["_id"]) else: if item.get("ingest_provider_sequence") is None: ingest_service.set_ingest_provider_sequence(item, provider) try: items_ids.extend(ingest_service.post_in_mongo([item])) except HTTPException as e: logger.error( "Exception while persisting item in %s collection: %s", ingest_collection, e) raise e if routing_scheme and new_version: routed = ingest_service.find_one( _id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service( "routing_schemes").apply_routing_scheme( routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) ProviderError.ingestItemError(ex, provider, item=item) return False, [] return True, items_ids
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None): items_ids = [] try: ingest_collection = feeding_service.service if hasattr( feeding_service, 'service') else 'ingest' ingest_service = superdesk.get_resource_service(ingest_collection) # determine if we already have this item old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) if not old_item: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD]) item.setdefault('source', provider.get('source', '')) item.setdefault('uri', item[GUID_FIELD]) # keep it as original guid if item.get('profile'): try: item['profile'] = bson.ObjectId(item['profile']) except bson.errors.InvalidId: pass set_default_state(item, CONTENT_STATE.INGESTED) item['expiry'] = get_expiry_date( provider.get('content_expiry') or app.config['INGEST_EXPIRY_MINUTES'], item.get('versioncreated')) if 'anpa_category' in item: process_anpa_category(item, provider) if 'subject' in item: if not app.config.get('INGEST_SKIP_IPTC_CODES', False): # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented process_iptc_codes(item, provider) if 'anpa_category' not in item: derive_category(item, provider) elif 'anpa_category' in item: derive_subject(item) apply_rule_set(item, provider, rule_set) if item.get('pubstatus', '') == 'canceled': item[ITEM_STATE] = CONTENT_STATE.KILLED ingest_cancel(item, feeding_service) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend and not baseImageRend.get( 'media'): # if there is media should be processed already href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype')) update_renditions(item, href, old_item) # if the item has associated media for key, assoc in item.get('associations', {}).items(): set_default_state(assoc, CONTENT_STATE.INGESTED) # wire up the id of the associated feature media to the ingested one guid = assoc.get('guid') assoc_name = assoc.get('headline') or assoc.get('slugline') or guid if guid: ingested = ingest_service.find_one(req=None, guid=guid) logger.info('assoc ingested before %s', assoc_name) if ingested is not None: assoc['_id'] = ingested['_id'] if is_new_version(assoc, ingested) and assoc.get( 'renditions'): # new version logger.info( 'new assoc version - re-transfer renditions for %s', assoc_name) transfer_renditions(assoc['renditions']) else: logger.info( 'same/old version - use already fetched renditions for %s', assoc_name) update_assoc_renditions(assoc, ingested) else: # there is no such item in the system - ingest it if assoc.get('renditions') and has_system_renditions( assoc): # all set, just download logger.info( 'new association with system renditions - transfer %s', assoc_name) transfer_renditions(assoc['renditions']) status, ids = ingest_item(assoc, provider, feeding_service, rule_set) if status: assoc['_id'] = ids[0] items_ids.extend(ids) ingested = ingest_service.find_one(req=None, _id=ids[0]) update_assoc_renditions(assoc, ingested) elif assoc.get('residRef'): item['associations'][key] = resolve_ref(assoc) new_version = True if old_item: new_version = is_new_version(item, old_item) updates = deepcopy(item) ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item) item.update(old_item) item.update(updates) items_ids.append(item['_id']) else: if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) try: items_ids.extend(ingest_service.post_in_mongo([item])) except HTTPException as e: logger.error( 'Exception while persisting item in %s collection: %s', ingest_collection, e) raise e if routing_scheme and new_version: routed = ingest_service.find_one( _id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service( 'routing_schemes').apply_routing_scheme( routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) ProviderError.ingestItemError(ex, provider, item=item) return False, [] return True, items_ids