示例#1
0
文件: joiny.py 项目: seanjh/bkm-ask
def main():
    data = snippets.load_snippets()
    artist_grouping = snippets.groupby_artist(data)

    dump_results("secret_artist_groups.json", artist_grouping)
    dump_results("secret_mentions.json", works_mentioned_sorted(artist_grouping))

    results = artsy_search_artist("Judy Chicago")
    with open("secret_artsy_search.json", "w") as outfile:
        outfile.write(json.dumps(results, sort_keys=True, indent=4))
示例#2
0
def main():
    data = snippets.load_snippets()

    all_nouns = dict()
    nouns_by_artist = dict()
    nouns_by_artwork = dict()

    for snip in data:
        artworks = snippets.get_snippet_artworks(snip)
        artists = [artist for work in artworks
                   for artist in work.get('artists')]
        for message in extract_snippet_messages_iter(snip):
            for noun in extract_message_nouns(message):
                all_nouns[noun] = all_nouns.setdefault(noun, 0) + 1
                update_noun_group(nouns_by_artist, artists, 'name', noun)
                update_noun_group(nouns_by_artwork, artworks, 'title', noun)

    artist_nouns_filtered = dict()
    for artist, nouns in nouns_by_artist.iteritems():
        tmp = {noun: count for noun, count in nouns.iteritems()
               if count >= CUTOFF}
        if tmp:
            artist_nouns_filtered[artist] = tmp

    with open('secret_artist_nouns.json', 'w') as outfile:
        json.dump(artist_nouns_filtered, outfile, indent=2)

    artwork_nouns_filtered = dict()
    for work, nouns in nouns_by_artwork.iteritems():
        tmp = {noun: count for noun, count in nouns.iteritems()
               if count >= CUTOFF}
        if tmp:
            artwork_nouns_filtered[work] = tmp
    with open('secret_artwork_nouns.json', 'w') as outfile:
        json.dump(artwork_nouns_filtered, outfile, indent=2)

    nouns_sorted = sorted(all_nouns.iteritems(),
                          key=noun_count_keyfunc, reverse=True)
    with open('secret_all_nouns.json', 'w') as outfile:
        json.dump({noun: count for noun, count in nouns_sorted if count > 20},
                  outfile, indent=2)