def api_collection_sources_feed_status_csv(collection_id, source_type): user_mc = user_mediacloud_client() collection = user_mc.tag(collection_id) list_type = str(source_type).lower() media_in_collection = media_with_tag(user_mediacloud_key(), collection_id) media_info_in_collection = _media_list_edit_job.map(media_in_collection) if list_type == 'review': filtered_media = [ m for m in media_info_in_collection if m['active_feed_count'] > 0 and m['num_stories_90'] == 0 and m['num_stories_last_year'] > 0 ] elif list_type == 'remove': filtered_media = [ m for m in media_info_in_collection if m['active_feed_count'] > 0 and m['num_stories_90'] == 0 and m['num_stories_last_year'] == 0 and m['latest_scrape_job.state'] == 'failed' ] elif list_type == 'unscrapeable': filtered_media = [ m for m in media_info_in_collection if m['active_feed_count'] == 0 and m['num_stories_90'] > 0 ] elif list_type == 'working': filtered_media = [ m for m in media_info_in_collection if m['active_feed_count'] > 0 and m['num_stories_last_year'] > 0 ] else: filtered_media = media_info_in_collection file_prefix = "Collection {} ({}) - sources feed {}".format( collection_id, collection['tag'], source_type) properties_to_include = SOURCE_FEED_LIST_CSV_PROPS return csv.download_media_csv(filtered_media, file_prefix, properties_to_include)
def api_collection_sources_csv(collection_id): user_mc = user_mediacloud_client() collection = user_mc.tag(collection_id) # not cached because props can change often all_media = media_with_tag(user_mediacloud_key(), collection_id) file_prefix = "Collection {} ({}) - sources ".format(collection_id, collection['tag']) properties_to_include = SOURCE_LIST_CSV_EDIT_PROPS return csv.download_media_csv(all_media, file_prefix, properties_to_include)
def _stream_media_list_csv(user_mc_key, filename, topics_id, **kwargs): # Helper method to stream a list of media back to the client as a csv. Any args you pass in will be # simply be passed on to a call to topicMediaList. all_media = [] more_media = True params = kwargs params[ 'limit'] = 1000 # an arbitrary value to let us page through with big pages try: while more_media: page = apicache.topic_media_list(user_mediacloud_key(), topics_id, **params) media_list = page['media'] all_media = all_media + media_list if 'next' in page['link_ids']: params['link_id'] = page['link_ids']['next'] more_media = True else: more_media = False return csv.download_media_csv(all_media, filename, TOPIC_MEDIA_CSV_PROPS) except Exception as exception: return json.dumps({'error': str(exception)}, separators=(',', ':')), 400
def api_collection_sources_csv(collection_id): user_mc = user_admin_mediacloud_client() # info = user_mc.tag(int(collection_id)) all_media = media_with_tag(user_mediacloud_key(), collection_id) for src in all_media: for tag in src['media_source_tags']: if is_metadata_tag_set(tag['tag_sets_id']): format_metadata_fields(src, tag['tag_sets_id'], tag['tag']) file_prefix = "Collection_Sourcelist_Template_for_" + collection_id + "_" what_type_download = COLLECTIONS_TEMPLATE_PROPS_EDIT return csv.download_media_csv(all_media, file_prefix, what_type_download)
def api_collection_sources_csv(collection_id): user_mc = user_mediacloud_client() collection = user_mc.tag( collection_id) # not cached because props can change often all_media = media_with_tag(user_mediacloud_key(), collection_id) for src in all_media: for tag in src['media_source_tags']: if is_metadata_tag_set(tag['tag_sets_id']): format_metadata_fields(src, tag) file_prefix = "Collection {} ({}) - sources ".format( collection_id, collection['tag']) properties_to_include = COLLECTIONS_TEMPLATE_PROPS_EDIT return csv.download_media_csv(all_media, file_prefix, properties_to_include)
def api_collection_sources_feed_status_csv(collection_id, source_type): user_mc = user_mediacloud_client() collection = user_mc.tag(collection_id) type = str(source_type).lower() media_in_collection = media_with_tag(user_mediacloud_key(), collection_id) media_info_in_collection = _fetch_collection_source_feed_info(media_in_collection) if type == 'review': filtered_media = [m for m in media_info_in_collection if m['active_feed_count'] > 0 and m['num_stories_90'] == 0 and m['num_stories_last_year'] > 0] elif type == 'remove': filtered_media = [m for m in media_info_in_collection if m['active_feed_count'] > 0 and m['num_stories_90'] == 0 and m['num_stories_last_year'] == 0 and m['latest_scrape_job.state'] == 'failed'] elif type == 'unscrapeable': filtered_media = [m for m in media_info_in_collection if m['active_feed_count'] == 0 and m['num_stories_90'] > 0] elif type == 'working': filtered_media = [m for m in media_info_in_collection if m['active_feed_count'] > 0 and m['num_stories_last_year'] > 0] else: filtered_media = media_info_in_collection file_prefix = "Collection {} ({}) - sources feed {}".format(collection_id, collection['tag'], source_type) properties_to_include = SOURCE_FEED_LIST_CSV_PROPS return csv.download_media_csv(filtered_media, file_prefix, properties_to_include)
def _stream_media_list_csv(user_mc_key, filename, topics_id, **kwargs): # Helper method to stream a list of media back to the client as a csv. Any args you pass in will be # simply be passed on to a call to topicMediaList. add_metadata = False # off for now because this is SUPER slow all_media = [] more_media = True params = kwargs params[ 'limit'] = 1000 # an arbitrary value to let us page through with big pages try: cols_to_export = TOPICS_TEMPLATE_PROPS if not add_metadata: cols_to_export = cols_to_export[:-4] # remove the metadata cols while more_media: page = apicache.topic_media_list(user_mediacloud_key(), topics_id, **params) media_list = page['media'] user_mc = user_admin_mediacloud_client() if add_metadata: for media_item in media_list: media_info = user_mc.media(media_item['media_id']) for eachItem in media_info['media_source_tags']: if is_metadata_tag_set(eachItem['tag_sets_id']): format_metadata_fields(media_item, eachItem) all_media = all_media + media_list if 'next' in page['link_ids']: params['link_id'] = page['link_ids']['next'] more_media = True else: more_media = False return csv.download_media_csv(all_media, filename, cols_to_export) except Exception as exception: return json.dumps({'error': str(exception)}, separators=(',', ':')), 400
def _stream_media_list_csv(user_mc_key, filename, topics_id, **kwargs): # Helper method to stream a list of media back to the client as a csv. Any args you pass in will be # simply be passed on to a call to topicMediaList. all_media = [] more_media = True params = kwargs params['limit'] = 1000 # an arbitrary value to let us page through with big pages try: while more_media: page = apicache.topic_media_list(user_mediacloud_key(), topics_id, **params) media_list = page['media'] all_media = all_media + media_list if 'next' in page['link_ids']: params['link_id'] = page['link_ids']['next'] more_media = True else: more_media = False return csv.download_media_csv(all_media, filename, TOPIC_MEDIA_CSV_PROPS) except Exception as exception: return json.dumps({'error': str(exception)}, separators=(',', ':')), 400
def download_sources_csv(all_media, file_prefix): if user_has_auth_role(ROLE_MEDIA_EDIT): what_type_download = SOURCES_TEMPLATE_PROPS_EDIT else: what_type_download = SOURCES_TEMPLATE_PROPS_VIEW # no editor_notes return download_media_csv(all_media, file_prefix, what_type_download)