def plot_phrase_frequencies(phrase_frequencies, output_path): jsonized_frequencies = jsonize_phrase_dict(phrase_frequencies) frequencies_descending = sorted(jsonized_frequencies, key=lambda d: d['frequency'], reverse=True) with open(join(output_path, 'phrase_frequencies_sorted'), 'w') as f: f.write(json.dumps(frequencies_descending, indent=4)) with open(join(output_path, 'phrase_frequencies_dict'), 'w') as f: f.write(json.dumps(jsonized_frequencies, indent=4)) plt.figure(0) frequencies = phrase_frequencies.values() plt.hist(frequencies, bins=max(frequencies)) plt.xlabel('number of occurrences') plt.ylabel('number of terms') plt.title('Histogram of Term Frequencies') plt.savefig(join(output_path, 'phrase_frequencies_all.pdf'), bbox_inches=0) plt.figure(1) frequencies = phrase_frequencies.values() plt.hist(frequencies, bins=9, range=(1, 10)) plt.xlabel('number of occurrences') plt.ylabel('number of terms') plt.title('Histogram of Term Frequencies (few occurrences)') plt.savefig(join(output_path, 'phrase_frequencies_top.pdf'), bbox_inches=0)
def make_heatmap(heatmap, graph_terms): try: set_status('getting document list', model=heatmap) with ManagedSession() as session: filtered_query = create_query_for_model(session, heatmap, dirty=False) extracted_terms = extract_terms(filtered_query, heatmap.term_type) heatmap_terms = flatten(extracted_terms) heatmap_vals = calculate_heatmap_values(heatmap_terms, graph_terms) heatmap.terms = json.dumps(jsonize_phrase_dict(heatmap_vals, 'intensity')) set_status('heatmap complete', model=heatmap) heatmap.finished = True heatmap.save() return heatmap_vals except Exception as e: set_status('Error: %s' % e, model=heatmap) raise e
def make_heatmap(heatmap, graph_terms): try: set_status('getting document list', model=heatmap) with ManagedSession() as session: heatmap_query= create_query(session, author=heatmap.author, institution=heatmap.institution) filtered_query = filter_query(heatmap_query, dirty=False, starting_year=heatmap.starting_year, ending_year=heatmap.ending_year, sample_size=heatmap.sample_size, model=heatmap) extracted_terms = extract_terms(filtered_query, heatmap.term_type) heatmap_terms = flatten(extracted_terms) heatmap_vals = calculate_heatmap_values(heatmap_terms, graph_terms) heatmap.terms = json.dumps(jsonize_phrase_dict(heatmap_vals, 'intensity')) set_status('heatmap complete', model=heatmap) heatmap.finished = True heatmap.save() return heatmap_vals except Exception as e: set_status('Error: %s' % e, model=heatmap) raise e
def plot_phrase_frequencies(phrase_frequencies, output_path): jsonized_frequencies = jsonize_phrase_dict(phrase_frequencies) frequencies_descending = sorted(jsonized_frequencies, key=lambda d: d["frequency"], reverse=True) with open(join(output_path, "phrase_frequencies_sorted"), "w") as f: f.write(json.dumps(frequencies_descending, indent=4)) with open(join(output_path, "phrase_frequencies_dict"), "w") as f: f.write(json.dumps(jsonized_frequencies, indent=4)) plt.figure(0) frequencies = phrase_frequencies.values() plt.hist(frequencies, bins=max(frequencies)) plt.xlabel("number of occurrences") plt.ylabel("number of terms") plt.title("Histogram of Term Frequencies") plt.savefig(join(output_path, "phrase_frequencies_all.pdf"), bbox_inches=0) plt.figure(1) frequencies = phrase_frequencies.values() plt.hist(frequencies, bins=9, range=(1, 10)) plt.xlabel("number of occurrences") plt.ylabel("number of terms") plt.title("Histogram of Term Frequencies (few occurrences)") plt.savefig(join(output_path, "phrase_frequencies_top.pdf"), bbox_inches=0)