def on_create_item(docs, repo_type=ARCHIVE): """Make sure item has basic fields populated.""" for doc in docs: update_dates_for(doc) set_original_creator(doc) if not doc.get(GUID_FIELD): doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) if 'unique_id' not in doc: generate_unique_id_and_name(doc, repo_type) if 'family_id' not in doc: doc['family_id'] = doc[GUID_FIELD] if 'event_id' not in doc: doc['event_id'] = generate_guid(type=GUID_TAG) set_default_state(doc, CONTENT_STATE.DRAFT) doc.setdefault(config.ID_FIELD, doc[GUID_FIELD]) if repo_type == ARCHIVE and not doc.get('ingest_provider'): # set the source for the article set_default_source(doc) copy_metadata_from_user_preferences(doc, repo_type) if 'profile' not in doc and app.config.get('DEFAULT_CONTENT_TYPE', None): doc['profile'] = app.config.get('DEFAULT_CONTENT_TYPE', None) if not doc.get(ITEM_OPERATION): doc[ITEM_OPERATION] = ITEM_CREATE
def _duplicate_planning(self, original): new_plan = deepcopy(original) if new_plan.get('expired') and new_plan.get('event_item'): # If the Planning item has expired and is associated with an Event # then we remove the link to the associated Event as the Event would have # been expired also del new_plan['event_item'] for f in ('_id', 'guid', 'lock_user', 'lock_time', 'original_creator', '_planning_schedule' 'lock_session', 'lock_action', '_created', '_updated', '_etag', 'pubstatus', 'expired', 'featured'): new_plan.pop(f, None) new_plan[ITEM_STATE] = WORKFLOW_STATE.DRAFT new_plan['guid'] = generate_guid(type=GUID_NEWSML) planning_datetime = new_plan.get('planning_date') current_date = get_local_end_of_day().date() if planning_datetime.date() < current_date: new_plan['planning_date'] = planning_datetime.replace( day=current_date.day, month=current_date.month, year=current_date.year) for cov in new_plan.get('coverages') or []: cov.pop('assigned_to', None) cov.get('planning', {})['scheduled'] = new_plan.get('planning_date') cov['coverage_id'] = generate_guid(type=GUID_NEWSML) cov['workflow_status'] = WORKFLOW_STATE.DRAFT cov['news_coverage_status'] = {'qcode': 'ncostat:int'} return new_plan
def generate_recurring_events(event): generated_events = [] setRecurringMode(event) # Get the recurrence_id, or generate one if it doesn't exist recurrence_id = event.get('recurrence_id', generate_guid(type=GUID_NEWSML)) # compute the difference between start and end in the original event time_delta = event['dates']['end'] - event['dates']['start'] # for all the dates based on the recurring rules: for date in itertools.islice(generate_recurring_dates( start=event['dates']['start'], tz=event['dates'].get('tz') and pytz.timezone(event['dates']['tz'] or None), **event['dates']['recurring_rule'] ), 0, 200): # set a limit to prevent too many events to be created # create event with the new dates new_event = copy.deepcopy(event) new_event['dates']['start'] = date new_event['dates']['end'] = date + time_delta # set a unique guid new_event['guid'] = generate_guid(type=GUID_NEWSML) new_event['_id'] = new_event['guid'] # set the recurrence id new_event['recurrence_id'] = recurrence_id # set expiry date overwrite_event_expiry_date(new_event) generated_events.append(new_event) return generated_events
def on_create_item(docs, repo_type=ARCHIVE): """Make sure item has basic fields populated.""" for doc in docs: update_dates_for(doc) set_original_creator(doc) if not doc.get(GUID_FIELD): doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) if "unique_id" not in doc: generate_unique_id_and_name(doc, repo_type) if "family_id" not in doc: doc["family_id"] = doc[GUID_FIELD] if "event_id" not in doc: doc["event_id"] = generate_guid(type=GUID_TAG) set_default_state(doc, CONTENT_STATE.DRAFT) doc.setdefault(config.ID_FIELD, doc[GUID_FIELD]) set_dateline(doc, repo_type) set_byline(doc, repo_type) set_sign_off(doc, repo_type=repo_type) if not doc.get(ITEM_OPERATION): doc[ITEM_OPERATION] = ITEM_CREATE
def on_create_item(docs, repo_type=ARCHIVE): """Make sure item has basic fields populated.""" for doc in docs: update_dates_for(doc) set_original_creator(doc) if not doc.get(GUID_FIELD): doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) if 'unique_id' not in doc: generate_unique_id_and_name(doc, repo_type) if 'family_id' not in doc: doc['family_id'] = doc[GUID_FIELD] if 'event_id' not in doc: doc['event_id'] = generate_guid(type=GUID_TAG) set_default_state(doc, CONTENT_STATE.DRAFT) doc.setdefault(config.ID_FIELD, doc[GUID_FIELD]) copy_metadata_from_user_preferences(doc, repo_type) if not doc.get(ITEM_OPERATION): doc[ITEM_OPERATION] = ITEM_CREATE
def on_duplicate_item(doc, original_doc, operation=None): """Make sure duplicated item has basic fields populated.""" doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) generate_unique_id_and_name(doc) doc['event_id'] = generate_guid(type=GUID_TAG) doc.setdefault('_id', doc[GUID_FIELD]) set_sign_off(doc) doc['force_unlock'] = True doc[ITEM_OPERATION] = operation or ITEM_DUPLICATE doc['original_id'] = original_doc.get('item_id', original_doc.get('_id')) set_default_source(doc)
def on_duplicate_item(doc, original_doc): """Make sure duplicated item has basic fields populated.""" doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) generate_unique_id_and_name(doc) doc['event_id'] = generate_guid(type=GUID_TAG) doc.setdefault('_id', doc[GUID_FIELD]) set_sign_off(doc) doc['force_unlock'] = True doc[ITEM_OPERATION] = ITEM_DUPLICATE doc['original_id'] = original_doc.get('item_id', original_doc.get('_id')) set_default_source(doc)
def on_duplicate_item(doc, original_doc, operation=None): """Make sure duplicated item has basic fields populated.""" doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) generate_unique_id_and_name(doc) doc["event_id"] = generate_guid(type=GUID_TAG) doc.setdefault("_id", doc[GUID_FIELD]) set_sign_off(doc) doc["force_unlock"] = True doc[ITEM_OPERATION] = operation or ITEM_DUPLICATE doc["original_id"] = original_doc.get("item_id", original_doc.get("_id")) set_default_source(doc)
def on_create_item(docs, repo_type=ARCHIVE): """Make sure item has basic fields populated.""" for doc in docs: editor_utils.generate_fields(doc) update_dates_for(doc) set_original_creator(doc) if not doc.get(GUID_FIELD): doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) if "unique_id" not in doc: generate_unique_id_and_name(doc, repo_type) if "family_id" not in doc: doc["family_id"] = doc[GUID_FIELD] if "event_id" not in doc and repo_type != "ingest": doc["event_id"] = generate_guid(type=GUID_TAG) set_default_state(doc, CONTENT_STATE.DRAFT) doc.setdefault(config.ID_FIELD, doc[GUID_FIELD]) if repo_type == ARCHIVE: # set the source for the article set_default_source(doc) if "profile" not in doc and app.config.get("DEFAULT_CONTENT_TYPE", None): doc["profile"] = app.config.get("DEFAULT_CONTENT_TYPE", None) copy_metadata_from_profile(doc) copy_metadata_from_user_preferences(doc, repo_type) if "language" not in doc: doc["language"] = app.config.get("DEFAULT_LANGUAGE", "en") if doc.get("task", None) and doc["task"].get("desk", None): desk = superdesk.get_resource_service("desks").find_one( req=None, _id=doc["task"]["desk"]) if desk and desk.get("desk_language", None): doc["language"] = desk["desk_language"] if not doc.get(ITEM_OPERATION): doc[ITEM_OPERATION] = ITEM_CREATE if doc.get("template"): from apps.templates.content_templates import render_content_template_by_id # avoid circular import doc.pop("fields_meta", None) render_content_template_by_id(doc, doc["template"], update=True) editor_utils.generate_fields(doc)
def on_create_item(docs, repo_type=ARCHIVE): """Make sure item has basic fields populated.""" for doc in docs: editor_utils.generate_fields(doc) update_dates_for(doc) set_original_creator(doc) if not doc.get(GUID_FIELD): doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) if 'unique_id' not in doc: generate_unique_id_and_name(doc, repo_type) if 'family_id' not in doc: doc['family_id'] = doc[GUID_FIELD] if 'event_id' not in doc and repo_type != 'ingest': doc['event_id'] = generate_guid(type=GUID_TAG) set_default_state(doc, CONTENT_STATE.DRAFT) doc.setdefault(config.ID_FIELD, doc[GUID_FIELD]) if repo_type == ARCHIVE: # set the source for the article set_default_source(doc) if 'profile' not in doc and app.config.get('DEFAULT_CONTENT_TYPE', None): doc['profile'] = app.config.get('DEFAULT_CONTENT_TYPE', None) copy_metadata_from_profile(doc) copy_metadata_from_user_preferences(doc, repo_type) if 'language' not in doc: doc['language'] = app.config.get('DEFAULT_LANGUAGE', 'en') if doc.get('task', None) and doc['task'].get('desk', None): desk = superdesk.get_resource_service('desks').find_one( req=None, _id=doc['task']['desk']) if desk and desk.get('desk_language', None): doc['language'] = desk['desk_language'] if not doc.get(ITEM_OPERATION): doc[ITEM_OPERATION] = ITEM_CREATE if doc.get('template'): from apps.templates.content_templates import render_content_template_by_id # avoid circular import doc.pop('fields_meta', None) render_content_template_by_id(doc, doc['template'], update=True) editor_utils.generate_fields(doc)
def create(self, docs, **kwargs): new_guids = [] provider = get_resource_service("ingest_providers").find_one(source="aapmm", req=None) if provider and "config" in provider and "username" in provider["config"]: self.backend.set_credentials(provider["config"]["username"], provider["config"]["password"]) for doc in docs: if not doc.get("desk"): # if no desk is selected then it is bad request raise SuperdeskApiError.badRequestError("Destination desk cannot be empty.") try: archived_doc = self.backend.find_one_raw(doc["guid"], doc["guid"]) except FileNotFoundError as ex: raise ProviderError.externalProviderError(ex, provider) dest_doc = dict(archived_doc) new_id = generate_guid(type=GUID_TAG) new_guids.append(new_id) dest_doc["_id"] = new_id generate_unique_id_and_name(dest_doc) if provider: dest_doc["ingest_provider"] = str(provider[superdesk.config.ID_FIELD]) dest_doc[config.VERSION] = 1 send_to(doc=dest_doc, update=None, desk_id=doc.get("desk"), stage_id=doc.get("stage")) dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED) dest_doc[INGEST_ID] = archived_doc["_id"] dest_doc[FAMILY_ID] = archived_doc["_id"] remove_unwanted(dest_doc) set_original_creator(dest_doc) superdesk.get_resource_service(ARCHIVE).post([dest_doc]) insert_into_versions(dest_doc.get("_id")) return new_guids
def _create_package(self, text_item, image_items): """ Create a new content package from given content items. """ package = { ITEM_TYPE: CONTENT_TYPE.COMPOSITE, 'guid': generate_guid(type=GUID_TAG, id=text_item.get('guid') + '-package'), 'versioncreated': text_item['versioncreated'], 'firstcreated': text_item.get('firstcreated'), 'headline': text_item.get('headline', ''), 'groups': [ { 'id': 'root', 'role': 'grpRole:NEP', 'refs': [{'idRef': 'main'}], }, { 'id': 'main', 'role': 'main', 'refs': [], } ] } item_references = package['groups'][1]['refs'] item_references.append({'residRef': text_item['guid']}) for image in image_items: item_references.append({'residRef': image['guid']}) return package
def create(self, docs, **kwargs): new_guids = [] provider = self.get_provider() for doc in docs: if not doc.get('desk'): # if no desk is selected then it is bad request raise SuperdeskApiError.badRequestError("Destination desk cannot be empty.") try: archived_doc = self.fetch(doc['guid']) except FileNotFoundError as ex: raise ProviderError.externalProviderError(ex, provider) dest_doc = dict(archived_doc) new_id = generate_guid(type=GUID_TAG) new_guids.append(new_id) dest_doc['_id'] = new_id generate_unique_id_and_name(dest_doc) if provider: dest_doc['ingest_provider'] = str(provider[superdesk.config.ID_FIELD]) dest_doc[config.VERSION] = 1 send_to(doc=dest_doc, update=None, desk_id=doc.get('desk'), stage_id=doc.get('stage')) dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED) dest_doc[INGEST_ID] = archived_doc['_id'] dest_doc[FAMILY_ID] = archived_doc['_id'] remove_unwanted(dest_doc) set_original_creator(dest_doc) superdesk.get_resource_service(ARCHIVE).post([dest_doc]) insert_into_versions(dest_doc.get('_id')) return new_guids
def _create_image_items(self, image_links, text_item): image_items = [] for image_url in image_links: guid_hash = hashlib.sha1(image_url.encode('utf8')).hexdigest() img_item = { 'guid': generate_guid(type=GUID_TAG, id=text_item.get('guid') + guid_hash + '-image'), ITEM_TYPE: CONTENT_TYPE.PICTURE, 'versioncreated': text_item.get('versioncreated'), 'firstcreated': text_item.get('firstcreated'), 'headline': text_item.get('headline', ''), 'renditions': { 'baseImage': { 'href': image_url } } } image_items.append(img_item) return image_items
def on_create(self, docs): """Set default metadata.""" for doc in docs: if not doc.get('guid'): doc['guid'] = generate_guid(type=GUID_NEWSML) set_original_creator(doc)
def parse_item(self, image_path): filename = os.path.basename(image_path) content_type = mimetypes.guess_type(image_path)[0] guid = utils.generate_guid(type=GUID_TAG) item = {'guid': guid, 'uri': guid, config.VERSION: 1, ITEM_TYPE: CONTENT_TYPE.PICTURE, 'mimetype': content_type, 'versioncreated': utcnow(), } with open(image_path, 'rb') as f: _, content_type, file_metadata = process_file_from_stream(f, content_type=content_type) f.seek(0) file_id = app.media.put(f, filename=filename, content_type=content_type, metadata=file_metadata) filemeta.set_filemeta(item, file_metadata) f.seek(0) metadata = get_meta_iptc(f) f.seek(0) self.parse_meta(item, metadata) rendition_spec = get_renditions_spec(no_custom_crops=True) renditions = generate_renditions(f, file_id, [file_id], 'image', content_type, rendition_spec, url_for_media) item['renditions'] = renditions return item
def update(self, id, updates, original): original_state = original[ITEM_STATE] if not is_workflow_state_transition_valid(ITEM_SPIKE, original_state): raise InvalidStateTransitionError() user = get_user(required=True) item = get_resource_service(ARCHIVE).find_one(req=None, _id=id) task = item.get('task', {}) updates[EXPIRY] = self._get_spike_expiry(desk_id=task.get('desk'), stage_id=task.get('stage')) updates[REVERT_STATE] = item.get(ITEM_STATE, None) if original.get('rewrite_of'): updates['rewrite_of'] = None if original.get('rewritten_by'): updates['rewritten_by'] = None if original.get('broadcast'): updates['broadcast'] = None if original.get('rewrite_sequence'): updates['rewrite_sequence'] = None # remove any relation with linked items updates[ITEM_EVENT_ID] = generate_guid(type=GUID_TAG) # remove lock updates.update({ 'lock_user': None, 'lock_session': None, }) if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: # remove links from items in the package package_service = PackageService() items = package_service.get_item_refs(original) for item in items: package_item = get_resource_service(ARCHIVE).find_one(req=None, _id=item[GUID_FIELD]) if package_item: linked_in_packages = [linked for linked in package_item.get(LINKED_IN_PACKAGES, []) if linked.get(PACKAGE) != original.get(config.ID_FIELD)] super().system_update(package_item[config.ID_FIELD], {LINKED_IN_PACKAGES: linked_in_packages}, package_item) # keep the structure of old group in order to be able to unspike the package updates[DELETED_GROUPS] = original[GROUPS] # and remove all the items from the package updates['groups'] = [] item = self.backend.update(self.datasource, id, updates, original) push_notification('item:spike', item=str(id), user=str(user.get(config.ID_FIELD))) history_updates = dict(updates) if original.get('task'): history_updates['task'] = original.get('task') app.on_archive_item_updated(history_updates, original, ITEM_SPIKE) self._removed_refs_from_package(id) return item
def on_create(self, docs, **kwargs): user_id = get_user(required=True)['_id'] session_id = get_auth()['_id'] existing_locks = list(self.find(where={})) for existing_lock in existing_locks: if str(existing_lock.get(LOCK_USER)) != str(user_id): raise SuperdeskApiError.forbiddenError( message= "Featured stories already being managed by another user.") elif str(existing_lock.get(LOCK_SESSION)) != str(session_id): raise SuperdeskApiError.forbiddenError( message= "Featured stories already being managed by you in another session." ) # get the lock if not raise forbidden exception if not lock(LOCK_ID, expire=5): raise SuperdeskApiError.forbiddenError( message="Unable to obtain lock on Featured stories.") for doc in docs: doc['_id'] = generate_guid(type=GUID_NEWSML) lock_updates = { LOCK_USER: user_id, LOCK_SESSION: session_id, LOCK_TIME: utcnow() } doc.update(lock_updates) return docs
def on_create(self, docs): # events generated by recurring rules generated_events = [] for event in docs: # generates an unique id if 'guid' not in event: event['guid'] = generate_guid(type=GUID_NEWSML) event['_id'] = event['guid'] # set the author set_original_creator(event) # overwrite expiry date overwrite_event_expiry_date(event) # We ignore the 'update_method' on create if 'update_method' in event: del event['update_method'] # generates events based on recurring rules if event['dates'].get('recurring_rule', None): generated_events.extend(generate_recurring_events(event)) # remove the event that contains the recurring rule. We don't need it anymore docs.remove(event) if generated_events: docs.extend(generated_events)
def setUp(self): super().setUp() dirname = os.path.dirname(os.path.realpath(__file__)) image_path = os.path.normpath( os.path.join(dirname, "fixtures", self.filename)) content_type = mimetypes.guess_type(image_path)[0] guid = utils.generate_guid(type=GUID_TAG) self.item = { "guid": guid, "version": 1, "_id": guid, ITEM_TYPE: CONTENT_TYPE.PICTURE, "mimetype": content_type, "versioncreated": datetime.now(), } with open(image_path, "rb") as f: _, content_type, file_metadata = process_file_from_stream( f, content_type=content_type) f.seek(0) file_id = app.media.put(f, filename=self.filename, content_type=content_type, metadata=file_metadata) filemeta.set_filemeta(self.item, file_metadata) f.seek(0) rendition_spec = get_renditions_spec() renditions = generate_renditions(f, file_id, [file_id], "image", content_type, rendition_spec, url_for_media) self.item["renditions"] = renditions archive = get_resource_service("archive") archive.post([self.item])
def _create_image_items(self, image_links, text_item): """Create a list of picture items that represent the external images located on given URLs. Each created item's `firstcreated` and `versioncreated` fields are set to the same value as the values of these fields in `text_item`. :param iterable image_links: list of image URLs :param dict text_item: the "main" text item the images are related to :return: list of created image items (as dicts) """ image_items = [] for image_url in image_links: img_item = { 'guid': generate_guid(type=GUID_TAG), ITEM_TYPE: CONTENT_TYPE.PICTURE, 'firstcreated': text_item.get('firstcreated'), 'versioncreated': text_item.get('versioncreated'), 'renditions': { 'baseImage': { 'href': image_url } } } image_items.append(img_item) return image_items
def delete(self, lookup): target_id = request.view_args["target_id"] archive_service = get_resource_service(ARCHIVE) target = archive_service.find_one(req=None, _id=target_id) updates = {} if target.get("rewrite_of"): # remove the rewrite info ArchiveSpikeService().update_rewrite(target) if not target.get("rewrite_of"): # there is nothing to do raise SuperdeskApiError.badRequestError( _("Only updates can be unlinked!")) if target.get("rewrite_of"): updates["rewrite_of"] = None if target.get("anpa_take_key"): updates["anpa_take_key"] = None if target.get("rewrite_sequence"): updates["rewrite_sequence"] = None if target.get("sequence"): updates["sequence"] = None updates["event_id"] = generate_guid(type=GUID_TAG) archive_service.system_update(target_id, updates, target) user = get_user(required=True) push_notification("item:unlink", item=target_id, user=str(user.get(config.ID_FIELD))) app.on_archive_item_updated(updates, target, ITEM_UNLINK)
def on_duplicate_item(doc): """Make sure duplicated item has basic fields populated.""" doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) generate_unique_id_and_name(doc) doc.setdefault('_id', doc[GUID_FIELD]) set_sign_off(doc)
def setUp(self): super().setUp() dirname = os.path.dirname(os.path.realpath(__file__)) image_path = os.path.normpath(os.path.join(dirname, 'fixtures', self.filename)) content_type = mimetypes.guess_type(image_path)[0] guid = utils.generate_guid(type=GUID_TAG) self.item = {'guid': guid, 'version': 1, '_id': guid, ITEM_TYPE: CONTENT_TYPE.PICTURE, 'mimetype': content_type, 'versioncreated': datetime.now() } with open(image_path, 'rb') as f: _, content_type, file_metadata = process_file_from_stream(f, content_type=content_type) f.seek(0) file_id = app.media.put(f, filename=self.filename, content_type=content_type, metadata=file_metadata) filemeta.set_filemeta(self.item, file_metadata) f.seek(0) rendition_spec = get_renditions_spec() renditions = generate_renditions(f, file_id, [file_id], 'image', content_type, rendition_spec, url_for_media) self.item['renditions'] = renditions archive = get_resource_service('archive') archive.post([self.item])
def test_trans_attributes(self): self.assertEqual(self.items[0].get(ITEM_TYPE), CONTENT_TYPE.TEXT) self.assertEqual(self.items[0].get('subject')[0].get('qcode'), '11016007') guid_hash = hashlib.sha1('https://www.example.com//12345'.encode('utf8')).hexdigest() guid = generate_guid(type=GUID_TAG, id=guid_hash) self.assertEqual(self.items[0].get('guid'), guid)
def test_trans_attributes(self): self.assertEqual(self.items[0].get(ITEM_TYPE), CONTENT_TYPE.TEXT) self.assertEqual(self.items[0].get("subject")[0].get("qcode"), "11016007") guid_hash = hashlib.sha1("https://www.example.com//12345".encode("utf8")).hexdigest() guid = generate_guid(type=GUID_TAG, id=guid_hash) self.assertEqual(self.items[0].get("guid"), guid)
def _create_image_items(self, image_links, text_item): """Create a list of picture items that represent the external images located on given URLs. Each created item's `firstcreated` and `versioncreated` fields are set to the same value as the values of these fields in `text_item`. :param iterable image_links: list of image URLs :param dict text_item: the "main" text item the images are related to :return: list of created image items (as dicts) """ image_items = [] for image_url in image_links: img_item = { "guid": generate_guid(type=GUID_TAG), ITEM_TYPE: CONTENT_TYPE.PICTURE, "firstcreated": text_item.get("firstcreated"), "versioncreated": text_item.get("versioncreated"), "renditions": {"baseImage": {"href": image_url}}, } image_items.append(img_item) return image_items
def _create_package(self, text_item, image_items): """Create a new content package from given content items. The package's `main` group contains only the references to given items, not the items themselves. In the list of references, the reference to the text item preceeds the references to image items. Package's `firstcreated` and `versioncreated` fields are set to values of these fields in `text_item`, and the `headline` is copied as well. :param dict text_item: item representing the text content :param list image_items: list of items (dicts) representing the images related to the text content :return: the created content package :rtype: dict """ package = { ITEM_TYPE: CONTENT_TYPE.COMPOSITE, "guid": generate_guid(type=GUID_TAG), "firstcreated": text_item["firstcreated"], "versioncreated": text_item["versioncreated"], "headline": text_item.get("headline", ""), "groups": [ {"id": "root", "role": "grpRole:NEP", "refs": [{"idRef": "main"}]}, {"id": "main", "role": "main", "refs": []}, ], } item_references = package["groups"][1]["refs"] item_references.append({"residRef": text_item["guid"]}) for image in image_items: item_references.append({"residRef": image["guid"]}) return package
def on_create(self, docs): """Set default metadata""" for doc in docs: doc['guid'] = generate_guid(type=GUID_NEWSML) doc['planning_type'] = 'agenda' set_original_creator(doc) self._validate_unique_agenda(doc, {})
def setUp(self): super().setUp() dirname = os.path.dirname(os.path.realpath(__file__)) image_path = os.path.normpath( os.path.join(dirname, 'fixtures', self.filename)) content_type = mimetypes.guess_type(image_path)[0] guid = utils.generate_guid(type=GUID_TAG) self.item = { 'guid': guid, 'version': 1, '_id': guid, ITEM_TYPE: CONTENT_TYPE.PICTURE, 'mimetype': content_type, 'versioncreated': datetime.now() } with open(image_path, 'rb') as f: _, content_type, file_metadata = process_file_from_stream( f, content_type=content_type) f.seek(0) file_id = app.media.put(f, filename=self.filename, content_type=content_type, metadata=file_metadata) filemeta.set_filemeta(self.item, file_metadata) f.seek(0) rendition_spec = get_renditions_spec() renditions = generate_renditions(f, file_id, [file_id], 'image', content_type, rendition_spec, url_for_media) self.item['renditions'] = renditions archive = get_resource_service('archive') archive.post([self.item])
def fetch_item(doc, desk_id, stage_id, state=None, target=None): dest_doc = dict(doc) if target: # set target subscriber info dest_doc.update(target) new_id = generate_guid(type=GUID_TAG) if doc.get("guid"): dest_doc.setdefault("uri", doc[GUID_FIELD]) dest_doc[config.ID_FIELD] = new_id dest_doc[GUID_FIELD] = new_id generate_unique_id_and_name(dest_doc) # avoid circular import from apps.tasks import send_to dest_doc[config.VERSION] = 1 dest_doc["versioncreated"] = utcnow() send_to(doc=dest_doc, desk_id=desk_id, stage_id=stage_id) dest_doc[ITEM_STATE] = state or CONTENT_STATE.FETCHED dest_doc[FAMILY_ID] = doc[config.ID_FIELD] dest_doc[INGEST_ID] = doc[config.ID_FIELD] dest_doc[ITEM_OPERATION] = ITEM_FETCH remove_unwanted(dest_doc) set_original_creator(dest_doc) return dest_doc
def _create_image_items(self, image_links, text_item): image_items = [] for image_url in image_links: guid_hash = hashlib.sha1(image_url.encode("utf8")).hexdigest() img_item = { "guid": generate_guid(type=GUID_TAG, id=text_item.get("guid") + guid_hash + "-image"), ITEM_TYPE: CONTENT_TYPE.PICTURE, "versioncreated": text_item.get("versioncreated"), "firstcreated": text_item.get("firstcreated"), "headline": text_item.get("headline", ""), "renditions": { "baseImage": { "href": image_url } }, } image_items.append(img_item) return image_items
def delete(self, lookup): target_id = request.view_args['target_id'] archive_service = get_resource_service(ARCHIVE) target = archive_service.find_one(req=None, _id=target_id) updates = {} if target.get('rewrite_of'): # remove the rewrite info ArchiveSpikeService().update_rewrite(target) if not target.get('rewrite_of'): # there is nothing to do raise SuperdeskApiError.badRequestError("Only updates can be unlinked!") if target.get('rewrite_of'): updates['rewrite_of'] = None if target.get('anpa_take_key'): updates['anpa_take_key'] = None if target.get('rewrite_sequence'): updates['rewrite_sequence'] = None if target.get('sequence'): updates['sequence'] = None updates['event_id'] = generate_guid(type=GUID_TAG) archive_service.system_update(target_id, updates, target) user = get_user(required=True) push_notification('item:unlink', item=target_id, user=str(user.get(config.ID_FIELD))) app.on_archive_item_updated(updates, target, ITEM_UNLINK)
def update(self, id, updates, original): original_state = original[ITEM_STATE] if not is_workflow_state_transition_valid(ITEM_SPIKE, original_state): raise InvalidStateTransitionError() user = get_user(required=True) item = get_resource_service(ARCHIVE).find_one(req=None, _id=id) task = item.get('task', {}) updates[EXPIRY] = self._get_spike_expiry(desk_id=task.get('desk'), stage_id=task.get('stage')) updates[REVERT_STATE] = item.get(ITEM_STATE, None) if original.get('rewrite_of'): updates['rewrite_of'] = None if original.get('rewritten_by'): updates['rewritten_by'] = None if original.get('broadcast'): updates['broadcast'] = None if original.get('rewrite_sequence'): updates['rewrite_sequence'] = None # remove any relation with linked items updates[ITEM_EVENT_ID] = generate_guid(type=GUID_TAG) if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: # remove links from items in the package package_service = PackageService() items = package_service.get_item_refs(original) for item in items: package_item = get_resource_service(ARCHIVE).find_one( req=None, _id=item[GUID_FIELD]) if package_item: linked_in_packages = [ linked for linked in package_item.get(LINKED_IN_PACKAGES, []) if linked.get(PACKAGE) != original.get(config.ID_FIELD) ] super().system_update( package_item[config.ID_FIELD], {LINKED_IN_PACKAGES: linked_in_packages}, package_item) # and remove all the items from the package updates['groups'] = [] item = self.backend.update(self.datasource, id, updates, original) push_notification('item:spike', item=str(id), user=str(user.get(config.ID_FIELD))) history_updates = dict(updates) if original.get('task'): history_updates['task'] = original.get('task') app.on_archive_item_updated(history_updates, original, ITEM_SPIKE) self._removed_refs_from_package(id) return item
def parse(self, file_path, provider=None): try: item = { ITEM_TYPE: CONTENT_TYPE.PREFORMATTED, "guid": generate_guid(type=GUID_TAG), "versioncreated": utcnow(), } with open(file_path, "rb") as f: lines = [line for line in f] # parse first header line m = re.match(b"\x01([a-zA-Z]*)([0-9]*) (.) (.) ([0-9]*) ([a-zA-Z0-9 ]*)", lines[0], flags=re.I) if m: item["original_source"] = m.group(1).decode("latin-1", "replace") item["ingest_provider_sequence"] = m.group(2).decode() item["priority"] = self.map_priority(m.group(3).decode()) item["anpa_category"] = [{"qcode": self.map_category(m.group(4).decode())}] item["word_count"] = int(m.group(5).decode()) inHeader = True inText = False inNote = False for line in lines[1:]: # STX starts the body of the story if line[0:1] == b"\x02": # pick the rest of the line off as the headline item["headline"] = line[1:].decode("latin-1", "replace").rstrip("\r\n") item["body_html"] = "" inText = True inHeader = False continue # ETX denotes the end of the story if line[0:1] == b"\x03": break if inText: if ( line.decode("latin-1", "replace").find("The following information is not for publication") != -1 or line.decode("latin-1", "replace").find( "The following information is not intended for publication" ) != -1 ): inNote = True inText = False item["ednote"] = "" continue item["body_html"] += line.decode("latin-1", "replace") if inNote: item["ednote"] += line.decode("latin-1", "replace") continue if inHeader: if "slugline" not in item: item["slugline"] = "" item["slugline"] += line.decode("latin-1", "replace").rstrip("/\r\n") continue return item except Exception as ex: raise ParserError.IPTC7901ParserError(exception=ex, provider=provider)
def ingest_item(item, provider, rule_set=None, routing_scheme=None): try: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] providers[provider.get('type')].provider = provider item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD]) item.setdefault('source', provider.get('source', '')) set_default_state(item, STATE_INGESTED) item['expiry'] = get_expiry_date(provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']), item.get('versioncreated')) if 'anpa_category' in item: process_anpa_category(item, provider) if 'subject' in item: process_iptc_codes(item, provider) if 'anpa_category' not in item: derive_category(item, provider) elif 'anpa_category' in item: derive_subject(item) apply_rule_set(item, provider, rule_set) ingest_service = superdesk.get_resource_service('ingest') if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend: href = providers[provider.get('type')].prepare_href(baseImageRend['href']) update_renditions(item, href, old_item) if old_item: # In case we already have the item, preserve the _id item[superdesk.config.ID_FIELD] = old_item[superdesk.config.ID_FIELD] ingest_service.put_in_mongo(item[superdesk.config.ID_FIELD], item) else: try: ingest_service.post_in_mongo([item]) except HTTPException as e: logger.error("Exception while persisting item in ingest collection", e) if routing_scheme: routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) try: superdesk.app.sentry.captureException() except: pass return False return True
def generate_recurring_events(event): generated_events = [] setRecurringMode(event) # Get the recurrence_id, or generate one if it doesn't exist recurrence_id = event.get('recurrence_id', generate_guid(type=GUID_NEWSML)) # compute the difference between start and end in the original event time_delta = event['dates']['end'] - event['dates']['start'] # for all the dates based on the recurring rules: for date in itertools.islice( generate_recurring_dates( start=event['dates']['start'], tz=event['dates'].get('tz') and pytz.timezone(event['dates']['tz'] or None), **event['dates']['recurring_rule']), 0, get_max_recurrent_events() ): # set a limit to prevent too many events to be created # create event with the new dates new_event = copy.deepcopy(event) # Remove fields not required by the new events for key in list(new_event.keys()): if key.startswith('_'): new_event.pop(key) elif key.startswith('lock_'): new_event.pop(key) new_event.pop('pubstatus', None) new_event.pop('reschedule_from', None) new_event['dates']['start'] = date new_event['dates']['end'] = date + time_delta # set a unique guid new_event['guid'] = generate_guid(type=GUID_NEWSML) new_event['_id'] = new_event['guid'] # set the recurrence id new_event['recurrence_id'] = recurrence_id # set expiry date overwrite_event_expiry_date(new_event) # the _planning_schedule set_planning_schedule(new_event) generated_events.append(new_event) return generated_events
def test_trans_attributes(self): self.assertEqual(self.items[0].get(ITEM_TYPE), CONTENT_TYPE.TEXT) self.assertEqual(self.items[0].get('subject')[0].get('qcode'), '11016007') guid_hash = hashlib.sha1( 'https://www.example.com//12345'.encode('utf8')).hexdigest() guid = generate_guid(type=GUID_TAG, id=guid_hash) self.assertEqual(self.items[0].get('guid'), guid)
def parse_file(self, filename, provider): """Parse 7901 file by given filename. :param filename """ try: item = {ITEM_TYPE: CONTENT_TYPE.PREFORMATTED} item['guid'] = generate_guid(type=GUID_TAG) item['versioncreated'] = utcnow() with open(filename, 'rb') as f: lines = [line for line in f] # parse first header line m = re.match(b'\x01([a-zA-Z]*)([0-9]*) (.) (.) ([0-9]*) ([a-zA-Z0-9 ]*)', lines[0], flags=re.I) if m: item['original_source'] = m.group(1).decode('latin-1', 'replace') item['ingest_provider_sequence'] = m.group(2).decode() item['priority'] = self.map_priority(m.group(3).decode()) item['anpa_category'] = [{'qcode': self.map_category(m.group(4).decode())}] item['word_count'] = int(m.group(5).decode()) inHeader = True inText = False inNote = False for line in lines[1:]: # STX starts the body of the story if line[0:1] == b'\x02': # pick the rest of the line off as the headline item['headline'] = line[1:].decode('latin-1', 'replace').rstrip('\r\n') item['body_html'] = '' inText = True inHeader = False continue # ETX denotes the end of the story if line[0:1] == b'\x03': break if inText: if line.decode('latin-1', 'replace').find('The following information is not for publication') != -1 \ or line.decode('latin-1', 'replace').find( 'The following information is not intended for publication') != -1: inNote = True inText = False item['ednote'] = '' continue item['body_html'] += line.decode('latin-1', 'replace') if inNote: item['ednote'] += line.decode('latin-1', 'replace') continue if inHeader: if 'slugline' not in item: item['slugline'] = '' item['slugline'] += line.decode('latin-1', 'replace').rstrip('/\r\n') continue return item except Exception as ex: raise ParserError.IPTC7901ParserError(exception=ex, provider=provider)
def on_create(self, docs): """Set default metadata.""" for doc in docs: if 'guid' not in doc: doc['guid'] = generate_guid(type=GUID_NEWSML) doc[config.ID_FIELD] = doc['guid'] set_original_creator(doc) self._set_planning_event_date(doc)
def on_duplicate_item(doc): """Make sure duplicated item has basic fields populated.""" doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) generate_unique_id_and_name(doc) doc.setdefault('_id', doc[GUID_FIELD]) set_sign_off(doc) doc['force_unlock'] = True doc[ITEM_OPERATION] = ITEM_DUPLICATE
def on_create_item(docs, repo_type=ARCHIVE): """Make sure item has basic fields populated.""" for doc in docs: update_dates_for(doc) set_original_creator(doc) if not doc.get(GUID_FIELD): doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML) if 'unique_id' not in doc: generate_unique_id_and_name(doc, repo_type) if 'family_id' not in doc: doc['family_id'] = doc[GUID_FIELD] if 'event_id' not in doc and repo_type != 'ingest': doc['event_id'] = generate_guid(type=GUID_TAG) set_default_state(doc, CONTENT_STATE.DRAFT) doc.setdefault(config.ID_FIELD, doc[GUID_FIELD]) if repo_type == ARCHIVE: # set the source for the article set_default_source(doc) if 'profile' not in doc and app.config.get('DEFAULT_CONTENT_TYPE', None): doc['profile'] = app.config.get('DEFAULT_CONTENT_TYPE', None) copy_metadata_from_profile(doc) copy_metadata_from_user_preferences(doc, repo_type) if 'language' not in doc: doc['language'] = app.config.get('DEFAULT_LANGUAGE', 'en') if doc.get('task', None) and doc['task'].get('desk', None): desk = superdesk.get_resource_service('desks').find_one(req=None, _id=doc['task']['desk']) if desk and desk.get('desk_language', None): doc['language'] = desk['desk_language'] if not doc.get(ITEM_OPERATION): doc[ITEM_OPERATION] = ITEM_CREATE
def create(self, docs, **kwargs): search_provider = get_resource_service('search_providers').find_one( search_provider=PROVIDER_NAME, req=None) if not search_provider or search_provider.get('is_closed', False): raise SuperdeskApiError.badRequestError( 'No search provider found or the search provider is closed.') if 'config' in search_provider: self.backend.set_credentials(search_provider['config']) new_guids = [] for doc in docs: if not doc.get( 'desk'): # if no desk is selected then it is bad request raise SuperdeskApiError.badRequestError( "Destination desk cannot be empty.") try: archived_doc = self.backend.find_one_raw( doc['guid'], doc['guid']) except FileNotFoundError as ex: raise ProviderError.externalProviderError(ex, search_provider) dest_doc = dict(archived_doc) new_id = generate_guid(type=GUID_TAG) new_guids.append(new_id) dest_doc[config.ID_FIELD] = new_id generate_unique_id_and_name(dest_doc) if search_provider: dest_doc['ingest_provider'] = str( search_provider[config.ID_FIELD]) dest_doc[config.VERSION] = 1 send_to(doc=dest_doc, update=None, desk_id=doc.get('desk'), stage_id=doc.get('stage')) dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED) dest_doc[INGEST_ID] = archived_doc[config.ID_FIELD] dest_doc[FAMILY_ID] = archived_doc[config.ID_FIELD] dest_doc[ITEM_OPERATION] = ITEM_FETCH remove_unwanted(dest_doc) set_original_creator(dest_doc) superdesk.get_resource_service(ARCHIVE).post([dest_doc]) insert_into_versions(dest_doc[config.ID_FIELD]) get_resource_service('search_providers').system_update( search_provider[config.ID_FIELD], {'last_item_update': utcnow()}, search_provider) return new_guids
def parse(self, file_path, provider=None): try: item = {ITEM_TYPE: CONTENT_TYPE.TEXT, 'guid': generate_guid(type=GUID_TAG), 'versioncreated': utcnow()} with open(file_path, 'rb') as f: lines = [line for line in f] # parse first header line m = re.match(b'\x01([a-zA-Z]*)([0-9]*) (.) (.) ([0-9]*) ([a-zA-Z0-9 ]*)', lines[0], flags=re.I) if m: item['original_source'] = m.group(1).decode('latin-1', 'replace') item['ingest_provider_sequence'] = m.group(2).decode() item['priority'] = self.map_priority(m.group(3).decode()) item['anpa_category'] = [{'qcode': self.map_category(m.group(4).decode())}] item['word_count'] = int(m.group(5).decode()) inHeader = True inText = False inNote = False for line in lines[1:]: # STX starts the body of the story if line[0:1] == b'\x02': # pick the rest of the line off as the headline item['headline'] = line[1:].decode('latin-1', 'replace').rstrip('\r\n') item['body_html'] = '' inText = True inHeader = False continue # ETX denotes the end of the story if line[0:1] == b'\x03': break if inText: if line.decode('latin-1', 'replace')\ .find('The following information is not for publication') != -1 \ or line.decode('latin-1', 'replace').find( 'The following information is not intended for publication') != -1: inNote = True inText = False item['ednote'] = '' continue item['body_html'] += line.decode('latin-1', 'replace') if inNote: item['ednote'] += line.decode('latin-1', 'replace') continue if inHeader: if 'slugline' not in item: item['slugline'] = '' item['slugline'] += line.decode('latin-1', 'replace').rstrip('/\r\n') continue return item except Exception as ex: raise ParserError.IPTC7901ParserError(exception=ex, provider=provider)
def fetch(self, docs, id=None, **kwargs): id_of_fetched_items = [] for doc in docs: id_of_item_to_be_fetched = doc.get(config.ID_FIELD) if id is None else id desk_id = doc.get('desk') stage_id = doc.get('stage') ingest_service = get_resource_service('ingest') ingest_doc = ingest_service.find_one(req=None, _id=id_of_item_to_be_fetched) if not ingest_doc: raise SuperdeskApiError.notFoundError('Fail to found ingest item with _id: %s' % id_of_item_to_be_fetched) if not is_workflow_state_transition_valid('fetch_from_ingest', ingest_doc[ITEM_STATE]): raise InvalidStateTransitionError() if doc.get('macro'): # there is a macro so transform it ingest_doc = get_resource_service('macros').execute_macro(ingest_doc, doc.get('macro')) archived = utcnow() ingest_service.patch(id_of_item_to_be_fetched, {'archived': archived}) dest_doc = dict(ingest_doc) new_id = generate_guid(type=GUID_TAG) id_of_fetched_items.append(new_id) dest_doc[config.ID_FIELD] = new_id dest_doc[GUID_FIELD] = new_id generate_unique_id_and_name(dest_doc) dest_doc[config.VERSION] = 1 send_to(doc=dest_doc, desk_id=desk_id, stage_id=stage_id) dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED) dest_doc[INGEST_ID] = dest_doc[FAMILY_ID] = ingest_doc[config.ID_FIELD] dest_doc[ITEM_OPERATION] = ITEM_FETCH remove_unwanted(dest_doc) set_original_creator(dest_doc) self.__fetch_items_in_package(dest_doc, desk_id, stage_id, doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)) get_resource_service(ARCHIVE).post([dest_doc]) insert_into_versions(doc=dest_doc) build_custom_hateoas(custom_hateoas, dest_doc) doc.update(dest_doc) if kwargs.get('notify', True): push_notification('item:fetch', fetched=1) return id_of_fetched_items
def _parse(self, xml): """ Parse xml document and returns list of events :param xml: xml document :type xml: lxml.etree._Element :return: a list of events """ items = [] documents = [] # http events api xml if xml.tag == self.SUPPORTED_ROOT_TAGS.RESULT: documents = xml.xpath('./document') # ftp events xml elif xml.tag == self.SUPPORTED_ROOT_TAGS.DOCUMENT: documents = [xml] self._prefetch_contacts() self._prefetch_vocabularies_items() for document in documents: item = { ITEM_TYPE: CONTENT_TYPE.EVENT, FORMAT: FORMATS.PRESERVED, GUID_FIELD: generate_guid(type=GUID_NEWSML), 'firstcreated': utcnow(), 'versioncreated': utcnow() } self._fill_ntb_id(document, item) self._fill_name(document, item) self._fill_dates(document, item) if 'start' not in item['dates'] or 'end' not in item['dates']: # explicit ignoring items without start/end dates. continue self._fill_definition_short(document, item) self._fill_priority(document, item) self._fill_category(document, item) self._fill_calendars(item) self._fill_anpa_category(document, item) self._fill_location(document, item) self._fill_subject(document, item) self._fill_slugline(item) self._fill_occur_status(item) self._fill_internal_note(document, item) self._fill_links(document, item) self._fill_event_contact_info(document, item) items.append(item) return items
def _duplicate_planning(self, original): new_plan = deepcopy(original) for f in ('_id', 'guid', 'lock_user', 'lock_time', 'original_creator', '_coverages' 'lock_session', 'lock_action', '_created', '_updated', '_etag', 'pubstatus'): new_plan.pop(f, None) new_plan[ITEM_STATE] = WORKFLOW_STATE.IN_PROGRESS new_plan['guid'] = generate_guid(type=GUID_NEWSML) return new_plan
def parse_item(self, image_path): filename = os.path.basename(image_path) content_type = mimetypes.guess_type(image_path)[0] guid = utils.generate_guid(type=GUID_TAG) item = {'guid': guid, config.VERSION: 1, config.ID_FIELD: guid, ITEM_TYPE: CONTENT_TYPE.PICTURE, 'mimetype': content_type, 'versioncreated': datetime.now() } with open(image_path, 'rb') as f: _, content_type, file_metadata = process_file_from_stream(f, content_type=content_type) f.seek(0) file_id = app.media.put(f, filename=filename, content_type=content_type, metadata=file_metadata) filemeta.set_filemeta(item, file_metadata) f.seek(0) metadata = get_meta_iptc(f) f.seek(0) rendition_spec = get_renditions_spec(no_custom_crops=True) renditions = generate_renditions(f, file_id, [file_id], 'image', content_type, rendition_spec, url_for_media) item['renditions'] = renditions try: date_created, time_created = metadata[TAG.DATE_CREATED], metadata[TAG.TIME_CREATED] except KeyError: pass else: # we format proper ISO 8601 date so we can parse it with dateutil datetime_created = '{}-{}-{}T{}:{}:{}{}{}:{}'.format(date_created[0:4], date_created[4:6], date_created[6:8], time_created[0:2], time_created[2:4], time_created[4:6], time_created[6], time_created[7:9], time_created[9:]) item['firstcreated'] = dateutil.parser.parse(datetime_created) # now we map IPTC metadata to superdesk metadata for source_key, dest_key in IPTC_MAPPING.items(): try: item[dest_key] = metadata[source_key] except KeyError: continue return item
def _parse_association(self, association): """Parses a BBC ninjs association :param association: :return: association dict """ key = association.pop('id') # BBC don't use 'featuremedia', they typically use 'featureimage' if re.match('^feature', key): key = 'featuremedia' parsed = deepcopy(association) parsed[ITEM_TYPE] = CONTENT_TYPE.PICTURE url = association['renditions']['original']['href'] guid_hash = hashlib.sha1(url.encode('utf8')).hexdigest() parsed['guid'] = generate_guid(type=GUID_TAG, id=guid_hash) return key, parsed
def create(self, docs, **kwargs): search_provider = get_resource_service('search_providers').find_one(search_provider=PROVIDER_NAME, req=None) if not search_provider or search_provider.get('is_closed', False): raise SuperdeskApiError.badRequestError('No search provider found or the search provider is closed.') if 'config' in search_provider: self.backend.set_credentials(search_provider['config']) new_guids = [] for doc in docs: if not doc.get('desk'): # if no desk is selected then it is bad request raise SuperdeskApiError.badRequestError("Destination desk cannot be empty.") try: archived_doc = self.backend.find_one_raw(doc['guid'], doc['guid']) except FileNotFoundError as ex: raise ProviderError.externalProviderError(ex, search_provider) dest_doc = dict(archived_doc) new_id = generate_guid(type=GUID_TAG) new_guids.append(new_id) dest_doc[config.ID_FIELD] = new_id generate_unique_id_and_name(dest_doc) if search_provider: dest_doc['ingest_provider'] = str(search_provider[config.ID_FIELD]) dest_doc[config.VERSION] = 1 send_to(doc=dest_doc, update=None, desk_id=doc.get('desk'), stage_id=doc.get('stage')) dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED) dest_doc[INGEST_ID] = archived_doc[config.ID_FIELD] dest_doc[FAMILY_ID] = archived_doc[config.ID_FIELD] dest_doc[ITEM_OPERATION] = ITEM_FETCH remove_unwanted(dest_doc) set_original_creator(dest_doc) superdesk.get_resource_service(ARCHIVE).post([dest_doc]) insert_into_versions(dest_doc[config.ID_FIELD]) get_resource_service('search_providers').system_update(search_provider[config.ID_FIELD], {'last_item_update': utcnow()}, search_provider) return new_guids
def _create_package(self, text_item, image_items): """Create a new content package from given content items. The package's `main` group contains only the references to given items, not the items themselves. In the list of references, the reference to the text item preceeds the references to image items. Package's `firstcreated` and `versioncreated` fields are set to values of these fields in `text_item`, and the `headline` is copied as well. :param dict text_item: item representing the text content :param list image_items: list of items (dicts) representing the images related to the text content :return: the created content package :rtype: dict """ package = { ITEM_TYPE: CONTENT_TYPE.COMPOSITE, 'guid': generate_guid(type=GUID_TAG), 'firstcreated': text_item['firstcreated'], 'versioncreated': text_item['versioncreated'], 'headline': text_item.get('headline', ''), 'groups': [ { 'id': 'root', 'role': 'grpRole:NEP', 'refs': [{'idRef': 'main'}], }, { 'id': 'main', 'role': 'main', 'refs': [], } ] } item_references = package['groups'][1]['refs'] item_references.append({'residRef': text_item['guid']}) for image in image_items: item_references.append({'residRef': image['guid']}) return package
def ingest_items_for(self, desk, no_of_stories, skip_index): desk_id = desk['_id'] stage_id = desk['incoming_stage'] bucket_size = min(100, no_of_stories) no_of_buckets = len(range(0, no_of_stories, bucket_size)) for x in range(0, no_of_buckets): skip = x * bucket_size * skip_index logger.info('Page : {}, skip: {}'.format(x + 1, skip)) cursor = get_resource_service('published').get_from_mongo(None, {}) cursor.skip(skip) cursor.limit(bucket_size) items = list(cursor) logger.info('Inserting {} items'.format(len(items))) archive_items = [] for item in items: dest_doc = dict(item) new_id = generate_guid(type=GUID_TAG) dest_doc[app.config['ID_FIELD']] = new_id dest_doc['guid'] = new_id generate_unique_id_and_name(dest_doc) dest_doc[app.config['VERSION']] = 1 dest_doc[ITEM_STATE] = CONTENT_STATE.FETCHED user_id = desk.get('members', [{'user': None}])[0].get('user') dest_doc['original_creator'] = user_id dest_doc['version_creator'] = user_id from apps.tasks import send_to send_to(dest_doc, desk_id=desk_id, stage_id=stage_id, user_id=user_id) dest_doc[app.config['VERSION']] = 1 # Above step increments the version and needs to reset dest_doc[FAMILY_ID] = item['_id'] remove_unwanted(dest_doc) archive_items.append(dest_doc) get_resource_service(ARCHIVE).post(archive_items) for item in archive_items: insert_into_versions(id_=item[app.config['ID_FIELD']])
def _parse_main(self, json): """Parses the main body of text and metadata :param json: :return: dict of article metadata and body """ # No GUID is included so generate one from the link main = {} guid_hash = hashlib.sha1(json['uri'].encode('utf8')).hexdigest() main['guid'] = generate_guid(type=GUID_TAG, id=guid_hash) # Copy over all attributes which are the same as Superdesk's ninjs variant for copy_property in self.direct_copy_properties: if json.get(copy_property) is not None: main[copy_property] = json[copy_property] main['versioncreated'] = self._parse_date(json['versioncreated']) main['firstcreated'] = self._parse_date(json['firstcreated']) if json.get('embargotime'): main['embargo'] = json['embargotime'] main['type'] = self._convert_type(json['type']) return main
def _create_image(self, association, main): """Builds an image item from an association :param association: The raw association in BBC's ninjs variant :param main: The main article body :return: A image item dict """ url = association['renditions']['original']['href'] guid_hash = hashlib.sha1(url.encode('utf8')).hexdigest() item = { 'guid': generate_guid(type=GUID_TAG, id=guid_hash + '-image'), ITEM_TYPE: CONTENT_TYPE.PICTURE, 'versioncreated': main['versioncreated'], 'firstcreated': main['firstcreated'], 'headline': association.get('headline', ''), 'description_text': association.get('description_text', ''), 'renditions': { 'baseImage': { 'href': url } } } return item
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None): try: ingest_service = superdesk.get_resource_service('ingest') # determine if we already have this item old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) if not old_item: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD]) item.setdefault('source', provider.get('source', '')) set_default_state(item, CONTENT_STATE.INGESTED) item['expiry'] = get_expiry_date(provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']), item.get('versioncreated')) if 'anpa_category' in item: process_anpa_category(item, provider) if 'subject' in item: process_iptc_codes(item, provider) if 'anpa_category' not in item: derive_category(item, provider) elif 'anpa_category' in item: derive_subject(item) apply_rule_set(item, provider, rule_set) if item.get('pubstatus', '') == 'canceled': item[ITEM_STATE] = CONTENT_STATE.KILLED ingest_cancel(item) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend: href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype')) update_renditions(item, href, old_item) new_version = True if old_item: updates = deepcopy(item) ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item) item.update(old_item) item.update(updates) # if the feed is versioned and this is not a new version if 'version' in item and 'version' in old_item and item.get('version') == old_item.get('version'): new_version = False else: if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) try: ingest_service.post_in_mongo([item]) except HTTPException as e: logger.error("Exception while persisting item in ingest collection", e) if routing_scheme and new_version: routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) try: superdesk.app.sentry.captureException() except: pass return False return True
def fetch(self, docs, id=None, **kwargs): id_of_fetched_items = [] for doc in docs: id_of_item_to_be_fetched = doc.get(config.ID_FIELD) if id is None else id desk_id = doc.get("desk") stage_id = doc.get("stage") ingest_service = get_resource_service("ingest") ingest_doc = ingest_service.find_one(req=None, _id=id_of_item_to_be_fetched) if not ingest_doc: raise SuperdeskApiError.notFoundError( "Fail to found ingest item with _id: %s" % id_of_item_to_be_fetched ) if not is_workflow_state_transition_valid("fetch_from_ingest", ingest_doc[ITEM_STATE]): raise InvalidStateTransitionError() if doc.get("macro"): # there is a macro so transform it ingest_doc = get_resource_service("macros").execute_macro(ingest_doc, doc.get("macro")) archived = utcnow() ingest_service.patch(id_of_item_to_be_fetched, {"archived": archived}) dest_doc = dict(ingest_doc) if doc.get("target"): dest_doc.update(doc.get("target")) new_id = generate_guid(type=GUID_TAG) id_of_fetched_items.append(new_id) dest_doc[config.ID_FIELD] = new_id dest_doc[GUID_FIELD] = new_id generate_unique_id_and_name(dest_doc) dest_doc[config.VERSION] = 1 dest_doc["versioncreated"] = archived send_to(doc=dest_doc, desk_id=desk_id, stage_id=stage_id) dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED) dest_doc[INGEST_ID] = dest_doc[FAMILY_ID] = ingest_doc[config.ID_FIELD] dest_doc[ITEM_OPERATION] = ITEM_FETCH remove_unwanted(dest_doc) set_original_creator(dest_doc) self.__fetch_items_in_package(dest_doc, desk_id, stage_id, doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)) desk = get_resource_service("desks").find_one(req=None, _id=desk_id) if desk and desk.get("default_content_profile"): dest_doc["profile"] = desk["default_content_profile"] if dest_doc.get("type", "text") in MEDIA_TYPES: dest_doc["profile"] = None get_resource_service(ARCHIVE).post([dest_doc]) insert_into_versions(doc=dest_doc) build_custom_hateoas(custom_hateoas, dest_doc) doc.update(dest_doc) if kwargs.get("notify", True): ingest_doc.update({"task": dest_doc.get("task")}) push_item_move_notification(ingest_doc, doc, "item:fetch") return id_of_fetched_items