def optimize_cluster_size(self, max_cl, file_name, min_cl=10):

        df = pd.DataFrame(columns=['Overall Purity'])
        for i in range(min_cl, max_cl):
            self.clust.cluster(i, n_init=10)
            e = Presenter()
            e.purity(self.clust.labels_true,
                     self.clust.labels_pred)
            df.loc[i] = e.overall_purity

        # Write dataframe to file
        f = open(file_name, 'wb')
        pickle.dump(df, f)
        f.close()
def main():
    clusterers = []
    for file in os.listdir('cached_clusterers'):
        f = open('cached_clusterers/'+file, 'rb')
        clusterers.append(pickle.load(f))
        f.close()
    purities = {}
    for cl in clusterers:
        e = Presenter()
        purities[cl.n_clusters] = e.purity(cl.labels_true, cl.labels_pred)
    df = pd.DataFrame(list(purities.items()), columns=('Topics',
                                                       'Overall Purity'))
    fig, ax = plt.subplots(1)
    pu_ax = df['Overall Purity'].plot(ax=ax, x='Topics', grid=True,
                                   legend=True, figsize=(40, 25))
    val = df['Overall Purity'].values
    xpoints = np.linspace(0, val.shape[0]-1, val.shape[0])
    poly = np.polyfit(xpoints, val, 2)
    poly = np.poly1d(poly)
    pol_ax = plt.plot(xpoints, poly(xpoints), 'r-', label='Fitted Polynomial')
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles, labels)
    ax.set_xlabel('Topics')
    fig.savefig('Purity.svg', format='svg')
    pdb.set_trace()