-
Notifications
You must be signed in to change notification settings - Fork 0
/
all_marginals.py
75 lines (63 loc) · 1.96 KB
/
all_marginals.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 18 23:10:26 2013
@author: Vasya
"""
brands ='''Acura
Audi
BMW
Buick
Cadillac
Chevrolet
Chrysler
Dodge
Fiat
Ford
Honda
Hyundai
Infiniti
Jaguar
Kia
Lexus
Lincoln
Mazda
Mercedes
Mitsubishi
Nissan
SAAB
Subaru
Toyota
Volkswagen
Volvo
'''.split("\n")
words = 'Arrogant Authentic Charming Daring Different Distinctive Dynamic Friendly Fun Healthy Helpful Independent Innovative Intelligent Kind Leader Obliging Original Prestigious Progressive Reliable Restrained Rugged Simple Social Stylish Traditional Trendy Trustworthy Unique'.split()
import numpy as np
import os
import getLikes
import pandas as pd
import genSimLDAlib as gslib
import marginal_topics_distr as marginal
for year in range(2002,2013):
print year
modelDir = 'Z:\\ermunds\\results\\%d 20topics'%year
modelName = '%d 20topics' %year
marginal.compute_and_save(modelName=modelName, LDAdir=modelDir)
topicsPs = np.genfromtxt(os.path.join(modelDir,'topics_marginal.csv'))
(divs,_,_) = getLikes.get_divs (words,brands,indir=modelDir, modelName=modelName ,topics_marginal_probs=topicsPs)
(sims,b,w) = getLikes.get_likes(words,brands,indir=modelDir, modelName=modelName )
dirs = gslib.LDAdirs(modelName,modelDir)
(dict1,_,lda)=gslib.loadStuff(dirs)
brands_df = getLikes.pruneWordsList(brands,lda)
words_df = getLikes.pruneWordsList(words,lda)
probs = getLikes.ptopic_given_word(lda,topicsPs)
probs_df = pd.DataFrame(probs, columns=lda.id2word.values())
alls = pd.concat([ brands_df["IDs"] ,words_df["IDs"]])
x = probs_df[alls]
x.columns = alls.index
writer = pd.ExcelWriter(os.path.join(modelDir,modelName+'_all_Aug18.xlsx'))
sims.to_excel(writer, sheet_name='cosine distance')
divs.to_excel(writer, sheet_name='KL divs')
b.to_excel(writer, sheet_name='brands')
w.to_excel(writer, sheet_name='words')
x.to_excel(writer, sheet_name='p_topic_given_word')
writer.save()