示例#1
0
def plot_phrase_frequencies(phrase_frequencies, output_path):
    jsonized_frequencies = jsonize_phrase_dict(phrase_frequencies)
    frequencies_descending = sorted(jsonized_frequencies,
                                    key=lambda d: d['frequency'],
                                    reverse=True)
    with open(join(output_path, 'phrase_frequencies_sorted'), 'w') as f:
        f.write(json.dumps(frequencies_descending, indent=4))
    with open(join(output_path, 'phrase_frequencies_dict'), 'w') as f:
        f.write(json.dumps(jsonized_frequencies, indent=4))

    plt.figure(0)
    frequencies = phrase_frequencies.values()
    plt.hist(frequencies, bins=max(frequencies))
    plt.xlabel('number of occurrences')
    plt.ylabel('number of terms')
    plt.title('Histogram of Term Frequencies')
    plt.savefig(join(output_path, 'phrase_frequencies_all.pdf'), bbox_inches=0)

    plt.figure(1)
    frequencies = phrase_frequencies.values()
    plt.hist(frequencies, bins=9, range=(1, 10))
    plt.xlabel('number of occurrences')
    plt.ylabel('number of terms')
    plt.title('Histogram of Term Frequencies (few occurrences)')
    plt.savefig(join(output_path, 'phrase_frequencies_top.pdf'), bbox_inches=0)
示例#2
0
def make_heatmap(heatmap, graph_terms):
    try:
        set_status('getting document list', model=heatmap)
        with ManagedSession() as session:
            filtered_query = create_query_for_model(session, heatmap, dirty=False)
            extracted_terms = extract_terms(filtered_query, heatmap.term_type)
        heatmap_terms = flatten(extracted_terms)
        heatmap_vals = calculate_heatmap_values(heatmap_terms, graph_terms)
        heatmap.terms = json.dumps(jsonize_phrase_dict(heatmap_vals, 'intensity'))
        set_status('heatmap complete', model=heatmap)
        heatmap.finished = True
        heatmap.save()
        return heatmap_vals
    except Exception as e:
        set_status('Error: %s' % e, model=heatmap)
        raise e
示例#3
0
def make_heatmap(heatmap, graph_terms):
    try:
        set_status('getting document list', model=heatmap)
        with ManagedSession() as session:
            heatmap_query= create_query(session, author=heatmap.author, institution=heatmap.institution)
            filtered_query = filter_query(heatmap_query, dirty=False,
                                          starting_year=heatmap.starting_year,
                                          ending_year=heatmap.ending_year,
                                          sample_size=heatmap.sample_size,
                                        model=heatmap)
            extracted_terms = extract_terms(filtered_query, heatmap.term_type)
        heatmap_terms = flatten(extracted_terms)
        heatmap_vals = calculate_heatmap_values(heatmap_terms, graph_terms)
        heatmap.terms = json.dumps(jsonize_phrase_dict(heatmap_vals, 'intensity'))
        set_status('heatmap complete', model=heatmap)
        heatmap.finished = True
        heatmap.save()
        return heatmap_vals
    except Exception as e:
        set_status('Error: %s' % e, model=heatmap)
        raise e
示例#4
0
def plot_phrase_frequencies(phrase_frequencies, output_path):
    jsonized_frequencies = jsonize_phrase_dict(phrase_frequencies)
    frequencies_descending = sorted(jsonized_frequencies, key=lambda d: d["frequency"], reverse=True)
    with open(join(output_path, "phrase_frequencies_sorted"), "w") as f:
        f.write(json.dumps(frequencies_descending, indent=4))
    with open(join(output_path, "phrase_frequencies_dict"), "w") as f:
        f.write(json.dumps(jsonized_frequencies, indent=4))

    plt.figure(0)
    frequencies = phrase_frequencies.values()
    plt.hist(frequencies, bins=max(frequencies))
    plt.xlabel("number of occurrences")
    plt.ylabel("number of terms")
    plt.title("Histogram of Term Frequencies")
    plt.savefig(join(output_path, "phrase_frequencies_all.pdf"), bbox_inches=0)

    plt.figure(1)
    frequencies = phrase_frequencies.values()
    plt.hist(frequencies, bins=9, range=(1, 10))
    plt.xlabel("number of occurrences")
    plt.ylabel("number of terms")
    plt.title("Histogram of Term Frequencies (few occurrences)")
    plt.savefig(join(output_path, "phrase_frequencies_top.pdf"), bbox_inches=0)