def total_purge(words):
    cols = list()
    for year in range(2002, 2013):
        print year
        modelDir = 'Z:\\ermunds\\results\\%d 20topics' % year
        modelName = '%d 20topics' % year
        dirs = gslib.LDAdirs(modelName, modelDir)
        (dict1, _, lda) = gslib.loadStuff(dirs)
        words_df = getLikes.pruneWordsList(words, lda)
        col = words_df["Counts"]
        col.name = str(year)
        cols.append(col)
    stats = pandas.DataFrame(cols)
    return stats
def total_purge(words):
    cols = list()
    for year in range(2002,2013):
        print year
        modelDir = 'Z:\\ermunds\\results\\%d 20topics'%year
        modelName = '%d 20topics' %year
        dirs = gslib.LDAdirs(modelName,modelDir)
        (dict1,_,lda)=gslib.loadStuff(dirs)  
        words_df = getLikes.pruneWordsList(words,lda)
        col = words_df["Counts"]
        col.name = str(year)
        cols.append(col)
    stats = pandas.DataFrame(cols)
    return stats
    


        

        
        
import genSimLDAlib as gslib
import mess_with_sims





indir=r"Z:\ermunds\results\2005 20topics"
modelName="2005+20topics"
    
dirs = gslib.LDAdirs(modelName,indir)
(dict1,_,lda)=gslib.loadStuff(dirs)

words = mess_with_sims.BrandsClustered_1
#words = lda.id2word.values()
wordsClean = getLikes.pruneWordsList(words,lda)

weights = getLikes.LDAweights(lda,wordsClean["IDs"])

import matplotlib.pyplot as plt


yy = wordsClean.Counts
xx = np.sqrt(weights)
labels = wordsClean.index

plt.scatter(xx,yy, marker='.')
'''
for label, x, y in zip(labels, xx, yy):
    plt.annotate(
        label, 
import getLikes

import numpy as np
import genSimLDAlib as gslib
import mess_with_sims


indir = r"Z:\ermunds\results\2005 20topics"
modelName = "2005+20topics"

dirs = gslib.LDAdirs(modelName, indir)
(dict1, _, lda) = gslib.loadStuff(dirs)

words = mess_with_sims.BrandsClustered_1
# words = lda.id2word.values()
wordsClean = getLikes.pruneWordsList(words, lda)

weights = getLikes.LDAweights(lda, wordsClean["IDs"])

import matplotlib.pyplot as plt


yy = wordsClean.Counts
xx = np.sqrt(weights)
labels = wordsClean.index

plt.scatter(xx, yy, marker=".")
"""
for label, x, y in zip(labels, xx, yy):
    plt.annotate(
        label, 
topicsPs = np.genfromtxt(os.path.join(modelDir,'topics_marginal.csv'))

words = getLikes.words_from_file(r"Z:\ermunds\adjectives.txt")
brands =getLikes.words_from_file(r"Z:\ermunds\brands.txt") 


(divs,_,_) = getLikes.get_divs (words,brands,indir=modelDir, modelName=modelName ,topics_marginal_probs=topicsPs)
(sims,b,w) = getLikes.get_likes(words,brands,indir=modelDir, modelName=modelName )




dirs = gslib.LDAdirs(modelName,modelDir)
(dict1,_,lda)=gslib.loadStuff(dirs)  

brands_df = getLikes.pruneWordsList(brands,lda)
words_df = getLikes.pruneWordsList(words,lda)

probs = getLikes.ptopic_given_word(lda,topicsPs)
probs_df =  pd.DataFrame(probs, columns=lda.id2word.values())
alls = pd.concat([ brands_df["IDs"] ,words_df["IDs"]])
x = probs_df[alls]
x.columns = alls.index


writer = pd.ExcelWriter(os.path.join(modelDir,modelName+'_new.xlsx'))
sims.to_excel(writer, sheet_name='cosine distance')
divs.to_excel(writer, sheet_name='KL divs')
b.to_excel(writer, sheet_name='brands')
w.to_excel(writer, sheet_name='words')
x.to_excel(writer, sheet_name='p_topic_given_word')
brands = getLikes.words_from_file(r"Z:\ermunds\brands.txt")

(divs, _, _) = getLikes.get_divs(words,
                                 brands,
                                 indir=modelDir,
                                 modelName=modelName,
                                 topics_marginal_probs=topicsPs)
(sims, b, w) = getLikes.get_likes(words,
                                  brands,
                                  indir=modelDir,
                                  modelName=modelName)

dirs = gslib.LDAdirs(modelName, modelDir)
(dict1, _, lda) = gslib.loadStuff(dirs)

brands_df = getLikes.pruneWordsList(brands, lda)
words_df = getLikes.pruneWordsList(words, lda)

probs = getLikes.ptopic_given_word(lda, topicsPs)
probs_df = pd.DataFrame(probs, columns=lda.id2word.values())
alls = pd.concat([brands_df["IDs"], words_df["IDs"]])
x = probs_df[alls]
x.columns = alls.index

writer = pd.ExcelWriter(os.path.join(modelDir, modelName + '_new.xlsx'))
sims.to_excel(writer, sheet_name='cosine distance')
divs.to_excel(writer, sheet_name='KL divs')
b.to_excel(writer, sheet_name='brands')
w.to_excel(writer, sheet_name='words')
x.to_excel(writer, sheet_name='p_topic_given_word')
writer.save()