Exemplo n.º 1
0
def normalized_and_story_split_count(q, open_q, start_date, end_date):
    fq = dates_as_filter_query(start_date.strftime("%Y-%m-%d"),
                               end_date.strftime("%Y-%m-%d"))
    matching = base_apicache.story_count(q, fq, split=True)
    matching = add_missing_dates_to_split_story_counts(matching['counts'],
                                                       start_date, end_date)
    total = base_apicache.story_count(open_q, fq, split=True)
    total = add_missing_dates_to_split_story_counts(total['counts'],
                                                    start_date, end_date)
    return {
        'counts': combined_split_and_normalized_counts(matching, total),
        'total': sum([day['count'] for day in matching]),
        'normalized_total': sum([day['count'] for day in total]),
    }
Exemplo n.º 2
0
def normalized_and_story_split_count(q, open_q, start_date, end_date):
    results = {}
    fq = dates_as_filter_query(start_date.strftime("%Y-%m-%d"),
                               end_date.strftime("%Y-%m-%d"))
    mc_api_key = base_cache.api_key()
    matching = cached_story_split_count(mc_api_key, q, fq)
    matching = add_missing_dates_to_split_story_counts(matching['counts'],
                                                       start_date, end_date)
    total = cached_story_split_count(mc_api_key, open_q, fq)
    total = add_missing_dates_to_split_story_counts(total['counts'],
                                                    start_date, end_date)
    results['counts'] = combined_split_and_normalized_counts(matching, total)
    results['total'] = sum([day['count'] for day in matching])
    results['normalized_total'] = sum([day['count'] for day in total])
    return results
def topic_split_story_counts(user_mc_key, topics_id, **kwargs):
    '''
    Return setence counts over timebased on filters.
    '''
    snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args)
    timespan = topic_timespan(topics_id, snapshots_id, foci_id, timespans_id)
    merged_args = {
        'snapshots_id': snapshots_id,
        'timespans_id': timespans_id,
        'foci_id': foci_id,
        'q': q,
        'fq': timespan['fq']
    }
    merged_args.update(
        kwargs
    )  # passed in args override anything pulled form the request.args
    # and make sure to ignore undateable stories
    undateable_query_part = "-(tags_id_stories:{})".format(
        STORY_UNDATEABLE_TAG)  # doesn't work if the query includes parens!!!
    if (merged_args['q'] is not None) and (len(merged_args['q']) > 0):
        merged_args['q'] = "(({}) AND {})".format(merged_args['q'],
                                                  undateable_query_part)
    else:
        merged_args['q'] = "* AND {}".format(undateable_query_part)
    results = _cached_topic_split_story_counts(user_mc_key, topics_id,
                                               **merged_args)
    results['counts'] = add_missing_dates_to_split_story_counts(
        results['counts'],
        datetime.strptime(timespan['start_date'],
                          mc.SENTENCE_PUBLISH_DATE_FORMAT),
        datetime.strptime(timespan['end_date'],
                          mc.SENTENCE_PUBLISH_DATE_FORMAT))
    return results
Exemplo n.º 4
0
def split_story_count(user_mc_key, q='*', last_n_days=None):
    start_date = None
    end_date = None
    if last_n_days is not None:
        start_date = datetime.datetime.today() - datetime.timedelta(
            last_n_days)
        end_date = YESTERDAY
        fq = mc.publish_date_query(start_date, end_date, True, True)
    else:
        fq = None
    results = _cached_split_story_counts(q, fq)
    if last_n_days is None:
        if len(results['counts']) > 0:
            # if we are getting ALL stories, make sure bad dates don't give us super old / future ones
            start_date = max(
                MC_START_DATE,
                datetime.datetime.strptime(results['counts'][0]['date'],
                                           mc.SENTENCE_PUBLISH_DATE_FORMAT))
            end_date = min(
                YESTERDAY,
                datetime.datetime.strptime(results['counts'][-1]['date'],
                                           mc.SENTENCE_PUBLISH_DATE_FORMAT))
    results['counts'] = add_missing_dates_to_split_story_counts(
        results['counts'], start_date, end_date)
    results['total_story_count'] = sum([r['count'] for r in results['counts']])
    return results
Exemplo n.º 5
0
def topic_split_story_counts(user_mc_key, topics_id, **kwargs):
    '''
    Return setence counts over timebased on filters.
    '''
    snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args)
    timespan = topic_timespan(topics_id, snapshots_id, foci_id, timespans_id)
    merged_args = {
        'snapshots_id': snapshots_id,
        'timespans_id': timespans_id,
        'foci_id': foci_id,
        'q': q,
        'fq': timespan['fq']
    }
    merged_args.update(kwargs)    # passed in args override anything pulled form the request.args
    # and make sure to ignore undateable stories
    undateable_query_part = "-(tags_id_stories:{})".format(STORY_UNDATEABLE_TAG)   # doesn't work if the query includes parens!!!
    if (merged_args['q'] is not None) and (len(merged_args['q']) > 0):
        merged_args['q'] = "(({}) AND {})".format(merged_args['q'], undateable_query_part)
    else:
        merged_args['q'] = "* AND {}".format(undateable_query_part)
    results = _cached_topic_split_story_counts(user_mc_key, topics_id, **merged_args)
    results['counts'] = add_missing_dates_to_split_story_counts(results['counts'],
                                                      datetime.strptime(timespan['start_date'], mc.SENTENCE_PUBLISH_DATE_FORMAT),
                                                      datetime.strptime(timespan['end_date'], mc.SENTENCE_PUBLISH_DATE_FORMAT))
    return results
def _cached_last_year_split_story_count(user_mc_key, q='*'):
    # Helper to fetch split story counts over a timeframe for an arbitrary query
    user_mc = user_mediacloud_client()
    last_n_days = 365
    start_date = datetime.date.today()-datetime.timedelta(last_n_days)
    end_date = datetime.date.today()-datetime.timedelta(1)  # yesterday
    fq = user_mc.publish_date_query(start_date, end_date)
    results = user_mc.storyCount(solr_query=q, solr_filter=fq, split=True, split_period='day')
    results['counts'] = add_missing_dates_to_split_story_counts(results['counts'], start_date, end_date)
    results['total_story_count'] = sum([r['count'] for r in results['counts']])
    return results
Exemplo n.º 7
0
def _cached_last_year_split_story_count(q='*'):
    # sources are open to everyone, so no need for user-specific cache
    # Helper to fetch split story counts over a timeframe for an arbitrary query
    user_mc = user_mediacloud_client()
    last_n_days = 365
    start_date = datetime.date.today()-datetime.timedelta(last_n_days)
    end_date = datetime.date.today()-datetime.timedelta(1)  # yesterday
    fq = user_mc.publish_date_query(start_date, end_date)
    results = user_mc.storyCount(solr_query=q, solr_filter=fq, split=True, split_period='day')
    results['counts'] = add_missing_dates_to_split_story_counts(results['counts'], start_date, end_date)
    results['total_story_count'] = sum([r['count'] for r in results['counts']])
    return results
Exemplo n.º 8
0
def split_story_count(q='*', last_n_days=None):
    # you can specify last_n_days to be 365 if you only want the last year of results
    start_date = None
    end_date = None
    if last_n_days is not None:
        # query until one day ago so we don't have the dropoff assocatied with looking at today's stories
        fq = "publish_date:[NOW-{}DAY TO NOW-1DAY]".format(last_n_days)
    else:
        fq = None
    results = _cached_split_story_counts(q, fq)
    if last_n_days is None:
        if len(results['counts']) > 0:
            # if we are getting ALL stories, make sure bad dates don't give us super old / future ones
            start_date = max(MC_START_DATE, datetime.datetime.strptime(results['counts'][0]['date'],
                                                                       mc.SENTENCE_PUBLISH_DATE_FORMAT))
            end_date = min(YESTERDAY, datetime.datetime.strptime(results['counts'][-1]['date'],
                                                                 mc.SENTENCE_PUBLISH_DATE_FORMAT))
    results['counts'] = add_missing_dates_to_split_story_counts(results['counts'], start_date, end_date)
    results['total_story_count'] = sum([r['count'] for r in results['counts']])
    return results