def write_topics_for_status(status): """ One of my analysis questions is how different do the topics look for each of the different status that I have. To help with this, I wrote this little macro like procedure to make it easier to pull all the topics for a particular status. """ data = cu.get_sample_data_frame_by_status(status) lda = OnlineLDA(vocab, K, D, 1.0 / K, 1.0 / K, 1024.0, 0.7) make_topic_columns(lda, data, K, D, batchsize) write_topics_csv(vocab, lda._lambda, "topics.csv", 10)
def write_topics_for_status(status): ''' One of my analysis questions is how different do the topics look for each of the different status that I have. To help with this, I wrote this little macro like procedure to make it easier to pull all the topics for a particular status. ''' data = cu.get_sample_data_frame_by_status(status) lda = OnlineLDA(vocab, K, D, 1./K, 1./K, 1024., 0.7) make_topic_columns(lda, data, K, D, batchsize) write_topics_csv(vocab, lda._lambda, 'topics.csv', 10)
def topic_summary_by_status(vocab, K, D, n, limit, batchsize): """ The purpose of this procedure is to write out a CSV that compares all the different topics for all the different categories in the main sample. """ n = 10 pieces = [] for status in cu.labels: data = cu.get_sample_data_frame_by_status(status, limit) lda = OnlineLDA(vocab, K, D, 1.0 / K, 1.0 / K, 1024.0, 0.7) make_topic_columns(lda, data, K, D, batchsize) summary = make_topic_summary(vocab, lda._lambda, n) pieces.append(summary) return pd.concat(pieces, keys=cu.labels)
def topic_summary_by_status(vocab, K, D, n, limit, batchsize): ''' The purpose of this procedure is to write out a CSV that compares all the different topics for all the different categories in the main sample. ''' n = 10 pieces = [] for status in cu.labels: data = cu.get_sample_data_frame_by_status(status, limit) lda = OnlineLDA(vocab, K, D, 1./K, 1./K, 1024., 0.7) make_topic_columns(lda, data, K, D, batchsize) summary = make_topic_summary(vocab, lda._lambda, n) pieces.append(summary) return pd.concat(pieces, keys=cu.labels)