"""run lda/plsa""" topic_word, doc_topic = ldaAdd.runLda(lda_m, dic) user_topic, users, users_pic_num = ldaAdd.userTopic(USER_FILE, points, doc_topic) if not (os.path.isfile(ZW_FILE) and os.path.isfile(DZ_FILE)): plsa_topic_word, plsa_doc_topic = plsaAdd.runPlsa(plsa_m, dic, CLUS_WORD_ZERO_FILE, ZW_FILE, DZ_FILE) else: plsa_topic_word, plsa_doc_topic = plsaAdd.loadPlsa(ZW_FILE, DZ_FILE, clus_num, len(dic)) plsa_user_topic, users, users_pic_num = ldaAdd.userTopic( USER_FILE, points, plsa_doc_topic) """trans/clus time, order score""" sc.estTransOrder(points, users, cluster_centers) rm_by_clus = points[:, -2] < sc.clus_k somepoints = [] for x in points[rm_by_clus]: if x[1] in users: somepoints.append(x) somepoints = np.array(somepoints) print 'somepoints:' print len(somepoints) random.seed(100) randpoints = random.sample(somepoints, 15000) randpoints = np.array(randpoints) clus_hr_sort = sc.lmsOfClusHr(users, user_topic, doc_topic, somepoints, [],
midclus.write(str(points[i,-2])+' ') midclus.close() #drawGmap.drawLayer(labels2, cluster_centers2, n_clusters_2, loc, 2) clus_num = n_clusters_2 """run lda""" if not os.path.isfile(LDA_FILE): lda_m = ldaAdd.saveLda(clus_num, dic, points, LDA_FILE, LDA_ZERO_FILE) else: lda_m = ldaAdd.readLda(LDA_FILE, LDA_ZERO_FILE) topic_word, doc_topic = ldaAdd.runLda(lda_m, dic) user_topic, users, t = ldaAdd.userTopic(USER_FILE, points, doc_topic) """trans/clus time, order score""" sc.estTransOrder(points, users, cluster_centers) rm_by_clus = points[:,-2] < sc.clus_k somepoints = [] for x in points[rm_by_clus]: if x[1] in users: somepoints.append(x) somepoints = np.array(somepoints) print 'somepoints:' print len(somepoints) random.seed(100) randpoints = random.sample(somepoints, 15000) randpoints = np.array(randpoints) trainpoints = []