def total_purge(words): cols = list() for year in range(2002, 2013): print year modelDir = 'Z:\\ermunds\\results\\%d 20topics' % year modelName = '%d 20topics' % year dirs = gslib.LDAdirs(modelName, modelDir) (dict1, _, lda) = gslib.loadStuff(dirs) words_df = getLikes.pruneWordsList(words, lda) col = words_df["Counts"] col.name = str(year) cols.append(col) stats = pandas.DataFrame(cols) return stats
def total_purge(words): cols = list() for year in range(2002,2013): print year modelDir = 'Z:\\ermunds\\results\\%d 20topics'%year modelName = '%d 20topics' %year dirs = gslib.LDAdirs(modelName,modelDir) (dict1,_,lda)=gslib.loadStuff(dirs) words_df = getLikes.pruneWordsList(words,lda) col = words_df["Counts"] col.name = str(year) cols.append(col) stats = pandas.DataFrame(cols) return stats
import genSimLDAlib as gslib import mess_with_sims indir=r"Z:\ermunds\results\2005 20topics" modelName="2005+20topics" dirs = gslib.LDAdirs(modelName,indir) (dict1,_,lda)=gslib.loadStuff(dirs) words = mess_with_sims.BrandsClustered_1 #words = lda.id2word.values() wordsClean = getLikes.pruneWordsList(words,lda) weights = getLikes.LDAweights(lda,wordsClean["IDs"]) import matplotlib.pyplot as plt yy = wordsClean.Counts xx = np.sqrt(weights) labels = wordsClean.index plt.scatter(xx,yy, marker='.') ''' for label, x, y in zip(labels, xx, yy): plt.annotate( label,
import getLikes import numpy as np import genSimLDAlib as gslib import mess_with_sims indir = r"Z:\ermunds\results\2005 20topics" modelName = "2005+20topics" dirs = gslib.LDAdirs(modelName, indir) (dict1, _, lda) = gslib.loadStuff(dirs) words = mess_with_sims.BrandsClustered_1 # words = lda.id2word.values() wordsClean = getLikes.pruneWordsList(words, lda) weights = getLikes.LDAweights(lda, wordsClean["IDs"]) import matplotlib.pyplot as plt yy = wordsClean.Counts xx = np.sqrt(weights) labels = wordsClean.index plt.scatter(xx, yy, marker=".") """ for label, x, y in zip(labels, xx, yy): plt.annotate( label,
topicsPs = np.genfromtxt(os.path.join(modelDir,'topics_marginal.csv')) words = getLikes.words_from_file(r"Z:\ermunds\adjectives.txt") brands =getLikes.words_from_file(r"Z:\ermunds\brands.txt") (divs,_,_) = getLikes.get_divs (words,brands,indir=modelDir, modelName=modelName ,topics_marginal_probs=topicsPs) (sims,b,w) = getLikes.get_likes(words,brands,indir=modelDir, modelName=modelName ) dirs = gslib.LDAdirs(modelName,modelDir) (dict1,_,lda)=gslib.loadStuff(dirs) brands_df = getLikes.pruneWordsList(brands,lda) words_df = getLikes.pruneWordsList(words,lda) probs = getLikes.ptopic_given_word(lda,topicsPs) probs_df = pd.DataFrame(probs, columns=lda.id2word.values()) alls = pd.concat([ brands_df["IDs"] ,words_df["IDs"]]) x = probs_df[alls] x.columns = alls.index writer = pd.ExcelWriter(os.path.join(modelDir,modelName+'_new.xlsx')) sims.to_excel(writer, sheet_name='cosine distance') divs.to_excel(writer, sheet_name='KL divs') b.to_excel(writer, sheet_name='brands') w.to_excel(writer, sheet_name='words') x.to_excel(writer, sheet_name='p_topic_given_word')
brands = getLikes.words_from_file(r"Z:\ermunds\brands.txt") (divs, _, _) = getLikes.get_divs(words, brands, indir=modelDir, modelName=modelName, topics_marginal_probs=topicsPs) (sims, b, w) = getLikes.get_likes(words, brands, indir=modelDir, modelName=modelName) dirs = gslib.LDAdirs(modelName, modelDir) (dict1, _, lda) = gslib.loadStuff(dirs) brands_df = getLikes.pruneWordsList(brands, lda) words_df = getLikes.pruneWordsList(words, lda) probs = getLikes.ptopic_given_word(lda, topicsPs) probs_df = pd.DataFrame(probs, columns=lda.id2word.values()) alls = pd.concat([brands_df["IDs"], words_df["IDs"]]) x = probs_df[alls] x.columns = alls.index writer = pd.ExcelWriter(os.path.join(modelDir, modelName + '_new.xlsx')) sims.to_excel(writer, sheet_name='cosine distance') divs.to_excel(writer, sheet_name='KL divs') b.to_excel(writer, sheet_name='brands') w.to_excel(writer, sheet_name='words') x.to_excel(writer, sheet_name='p_topic_given_word') writer.save()