def graph_present():
    data = load_rep_data()
    df = pd.DataFrame(data)
    sub = df[['name', 'present', 'yea', 'nay', 'absent']].sort_values(by='yea', ascending=False)
    sns.barplot(x="yea", y="name", data=sub, label='small')
    plt.show()
    return
def graph_present_by_party():
    data = load_rep_data()
    df = pd.DataFrame(data)
    sub = df[['name', 'present', 'party', 'yea', 'nay']]
    group = sub.groupby(['party']).sum().sort_values(by='present', ascending=True)
    group.plot(kind='barh', ax=ax)
    plt.show()
    return
def make_stat_array_votes():
    data = load_rep_data()
    votes = [x['votes'] for x in data]
    names = [x['name'] + ' (' + x['party'] + ')' for x in data]
    sizes = [x['present'] for x in data]
    parties = [x['party'] for x in data]
    result = np.array(votes)
    return {'mat': votes, 'names': names, 'sizes': sizes, 'parties': parties}
def plot_solidarity_index():
    data = load_rep_data()
    mat = []
    for d in data:
        mat.append(d['votes'])
    line = np.sum(mat, axis=0)
    plt.matshow(line)
    print line.shape
    plt.show()
    return
def show_votes():
    data = load_rep_data()
    mat = []
    labels = []
    for d in data:
        mat.append(d['votes'][:100])
        labels.append(d['name'])
    mat = np.array(mat)
    ax.matshow(mat, cmap=plt.cm.get_cmap('coolwarm'), aspect='auto')
    ax.set_yticklabels(['']+labels)
    plt.show()
    return
def text_to_vectors():
    data = load_rep_data()#[:40]
    names = [x['name'] + ' (' + x['party'] + ')' for x in data]
    quotes = [x['quotes'].lower() for x in data]
    sizes = [x['present'] for x in data]
    parties = [x['party'] for x in data]
    vectorizer = TfidfVectorizer\
        (min_df=25, stop_words=stop_words,
         strip_accents='unicode', lowercase=True, ngram_range=(1, 2),
         norm='l2', smooth_idf=True, sublinear_tf=False, use_idf=True,
         analyzer='word'
         )
    print 'vectorizing ...'
    X = vectorizer.fit_transform(quotes)
    D = -(X * X.T).todense()
    return {'mat': D, 'names': names, 'sizes': sizes, 'parties': parties}