Exemplo n.º 1
0
def normalized_and_story_count(q, fq, open_q):
    results = {}
    mc_api_key = base_cache.api_key()
    results['total'] = _cached_total_story_count(mc_api_key, q, fq)['count']
    results['normalized_total'] = _cached_total_story_count(
        mc_api_key, open_q, fq)['count']
    return results
Exemplo n.º 2
0
def normalized_and_story_split_count(q, fq, open_q):
    results = {}
    counts = []
    mc_api_key = base_cache.api_key()
    data = cached_story_split_count(mc_api_key, q, fq)
    all_stories = cached_story_split_count(mc_api_key, open_q, fq)
    for day in all_stories['counts']:
        day_info = {
            'date': trimSolrDate(day['date']),
            'total_count': day['count']
        }
        matching = [d for d in data['counts'] if d['date'] == day['date']]
        if len(matching) == 0:
            day_info['count'] = 0
        else:
            day_info['count'] = matching[0]['count']
        if day_info['count'] == 0 or day['count'] == 0:
            day_info['ratio'] = 0
        else:
            day_info['ratio'] = float(day_info['count']) / float(day['count'])
        counts.append(day_info)
    results['counts'] = sorted(counts, key=itemgetter('date'))
    results['total'] = sum([day['count'] for day in data['counts']])
    results['normalized_total'] = sum([day['count'] for day in all_stories['counts']])
    return results
Exemplo n.º 3
0
def tag_set_coverage(total_q, subset_q, fq):
    api_key = base_cache.api_key()
    coverage = {
        'totals': _cached_total_story_count(api_key, total_q, fq)['count'],
        'counts': _cached_total_story_count(api_key, subset_q, fq)['count'],
    }
    coverage['coverage_percentage'] = 0 if coverage['totals'] is 0 else float(coverage['counts'])/float(coverage['totals'])
    return coverage
Exemplo n.º 4
0
def story_list_page(q,
                    fq,
                    last_processed_stories_id=None,
                    stories_per_page=1000,
                    sort=mc.SORT_PROCESSED_STORIES_ID):
    return _cached_story_list_page(base_cache.api_key(), q, fq,
                                   last_processed_stories_id, stories_per_page,
                                   sort)
Exemplo n.º 5
0
def _most_used_tags(q, fq, tag_sets_id):
    # top tags used in stories matching query (pass in None for no limit)
    api_key = base_cache.api_key()
    tags = _cached_most_used_tags(api_key, q, fq, tag_sets_id, 1000)
    # extract bogus NYT tags
    for t in tags:
        if is_bad_theme(t['tags_id']):
            tags.remove(t)
    return tags
Exemplo n.º 6
0
def tag_set_coverage(total_q, subset_q, fq):
    api_key = base_cache.api_key()
    coverage = {
        'totals': _cached_total_story_count(api_key, total_q, fq)['count'],
        'counts': _cached_total_story_count(api_key, subset_q, fq)['count'],
    }
    coverage['coverage_percentage'] = 0 if coverage['totals'] is 0 else float(
        coverage['counts']) / float(coverage['totals'])
    return coverage
Exemplo n.º 7
0
def _most_used_tags(q, fq, tag_sets_id):
    # top tags used in stories matching query (pass in None for no limit)
    api_key = base_cache.api_key()
    tags = _cached_most_used_tags(api_key, q, fq, tag_sets_id,
                                  TAG_COUNT_SAMPLE_SIZE)
    # extract bogus NYT tags
    for t in tags:
        if is_bad_theme(t['tags_id']):
            tags.remove(t)
    return tags
Exemplo n.º 8
0
def normalized_and_story_split_count(q, open_q, start_date, end_date):
    results = {}
    fq = dates_as_filter_query(start_date.strftime("%Y-%m-%d"),
                               end_date.strftime("%Y-%m-%d"))
    mc_api_key = base_cache.api_key()
    matching = cached_story_split_count(mc_api_key, q, fq)
    matching = add_missing_dates_to_split_story_counts(matching['counts'],
                                                       start_date, end_date)
    total = cached_story_split_count(mc_api_key, open_q, fq)
    total = add_missing_dates_to_split_story_counts(total['counts'],
                                                    start_date, end_date)
    results['counts'] = combined_split_and_normalized_counts(matching, total)
    results['total'] = sum([day['count'] for day in matching])
    results['normalized_total'] = sum([day['count'] for day in total])
    return results
Exemplo n.º 9
0
def tag_set(tag_sets_id):
    return _cached_tag_set(base_cache.api_key(), tag_sets_id)
Exemplo n.º 10
0
def word_count(q, fq, ngram_size, num_words, sample_size):
    api_key = base_cache.api_key()
    return _cached_word_count(api_key, q, fq, ngram_size, num_words, sample_size)
Exemplo n.º 11
0
def story_list_page(q, fq, last_processed_stories_id=None, stories_per_page=1000, sort=mc.SORT_PROCESSED_STORIES_ID):
    return _cached_story_list_page(base_cache.api_key(), q, fq, last_processed_stories_id, stories_per_page, sort)
Exemplo n.º 12
0
def normalized_and_story_count(q, fq, open_q):
    results = {}
    mc_api_key = base_cache.api_key()
    results['total'] = _cached_total_story_count(mc_api_key, q, fq)['count']
    results['normalized_total'] = _cached_total_story_count(mc_api_key, open_q, fq)['count']
    return results
Exemplo n.º 13
0
def story_count(q, fq):
    api_key = base_cache.api_key()
    return _cached_total_story_count(api_key, q, fq)
Exemplo n.º 14
0
def tag_set(tag_sets_id):
    return _cached_tag_set(base_cache.api_key(), tag_sets_id)
Exemplo n.º 15
0
def word_count(q, fq, ngram_size, num_words, sample_size):
    api_key = base_cache.api_key()
    return _cached_word_count(api_key, q, fq, ngram_size, num_words,
                              sample_size)
Exemplo n.º 16
0
def story_count(q, fq):
    api_key = base_cache.api_key()
    return _cached_total_story_count(api_key, q, fq)