def main(): data = snippets.load_snippets() artist_grouping = snippets.groupby_artist(data) dump_results("secret_artist_groups.json", artist_grouping) dump_results("secret_mentions.json", works_mentioned_sorted(artist_grouping)) results = artsy_search_artist("Judy Chicago") with open("secret_artsy_search.json", "w") as outfile: outfile.write(json.dumps(results, sort_keys=True, indent=4))
def main(): data = snippets.load_snippets() all_nouns = dict() nouns_by_artist = dict() nouns_by_artwork = dict() for snip in data: artworks = snippets.get_snippet_artworks(snip) artists = [artist for work in artworks for artist in work.get('artists')] for message in extract_snippet_messages_iter(snip): for noun in extract_message_nouns(message): all_nouns[noun] = all_nouns.setdefault(noun, 0) + 1 update_noun_group(nouns_by_artist, artists, 'name', noun) update_noun_group(nouns_by_artwork, artworks, 'title', noun) artist_nouns_filtered = dict() for artist, nouns in nouns_by_artist.iteritems(): tmp = {noun: count for noun, count in nouns.iteritems() if count >= CUTOFF} if tmp: artist_nouns_filtered[artist] = tmp with open('secret_artist_nouns.json', 'w') as outfile: json.dump(artist_nouns_filtered, outfile, indent=2) artwork_nouns_filtered = dict() for work, nouns in nouns_by_artwork.iteritems(): tmp = {noun: count for noun, count in nouns.iteritems() if count >= CUTOFF} if tmp: artwork_nouns_filtered[work] = tmp with open('secret_artwork_nouns.json', 'w') as outfile: json.dump(artwork_nouns_filtered, outfile, indent=2) nouns_sorted = sorted(all_nouns.iteritems(), key=noun_count_keyfunc, reverse=True) with open('secret_all_nouns.json', 'w') as outfile: json.dump({noun: count for noun, count in nouns_sorted if count > 20}, outfile, indent=2)