示例#1
0
#!/usr/bin/env python
from wordcount import load_word_counts
import sys

def top_two_word(counts):
    """
    Given a list of (word, count, percentage) tuples,
    return the top two word counts.
    """
    limited_counts = counts[0:2]
    count_data = [count for (_, count, _) in limited_counts]
    return count_data


if __name__ == '__main__':
    input_files = sys.argv[1:]
    print("Book\tFirst\tSecond\tRatio")
    for input_file in input_files:
        counts = load_word_counts(input_file)
        [first, second] = top_two_word(counts)
        bookname = input_file[13:-4]
        print("%s\t%i\t%i\t%.2f" %(bookname, first, second, float(first)/second))

def plot_word_counts(counts, limit=10):
    """
    Given a list of (word, count, percentage) tuples, plot the counts as a
    histogram. Only the first limit tuples are plotted.
    """
    plt.title("Word Counts")
    limited_counts = counts[0:limit]
    word_data = [word for (word, _, _) in limited_counts]
    count_data = [count for (_, count, _) in limited_counts]
    position = np.arange(len(word_data))
    width = 1.0
    ax = plt.axes()
    ax.set_xticks(position + (width / 2))
    ax.set_xticklabels(word_data)
    plt.bar(position, count_data, width, color='b')

if __name__ == '__main__':
    input_file = sys.argv[1]
    output_file = sys.argv[2]
    limit = 10
    if len(sys.argv) > 3:
        limit = int(sys.argv[3])
    counts = load_word_counts(input_file)
    plot_word_counts(counts, limit)
    if output_file == "show":
        plt.show()
    else:
        plt.savefig(output_file)