示例#1
0
def write_topics_for_status(status):
    """
    One of my analysis questions is how different do the topics
    look for each of the different status that I have. To help
    with this, I wrote this little macro like procedure to make
    it easier to pull all the topics for a particular status.
    """
    data = cu.get_sample_data_frame_by_status(status)
    lda = OnlineLDA(vocab, K, D, 1.0 / K, 1.0 / K, 1024.0, 0.7)
    make_topic_columns(lda, data, K, D, batchsize)
    write_topics_csv(vocab, lda._lambda, "topics.csv", 10)
示例#2
0
def write_topics_for_status(status):
    '''
    One of my analysis questions is how different do the topics
    look for each of the different status that I have. To help
    with this, I wrote this little macro like procedure to make
    it easier to pull all the topics for a particular status.
    '''
    data = cu.get_sample_data_frame_by_status(status)
    lda = OnlineLDA(vocab, K, D, 1./K, 1./K, 1024., 0.7)
    make_topic_columns(lda, data, K, D, batchsize)
    write_topics_csv(vocab, lda._lambda, 'topics.csv', 10)
示例#3
0
def topic_summary_by_status(vocab, K, D, n, limit, batchsize):
    """
    The purpose of this procedure is to write out a CSV that compares
    all the different topics for all the different categories in the
    main sample.
    """
    n = 10
    pieces = []
    for status in cu.labels:
        data = cu.get_sample_data_frame_by_status(status, limit)
        lda = OnlineLDA(vocab, K, D, 1.0 / K, 1.0 / K, 1024.0, 0.7)
        make_topic_columns(lda, data, K, D, batchsize)
        summary = make_topic_summary(vocab, lda._lambda, n)
        pieces.append(summary)
    return pd.concat(pieces, keys=cu.labels)
示例#4
0
def topic_summary_by_status(vocab, K, D, n, limit, batchsize):
    '''
    The purpose of this procedure is to write out a CSV that compares
    all the different topics for all the different categories in the
    main sample.
    '''
    n = 10
    pieces = []
    for status in cu.labels:
        data = cu.get_sample_data_frame_by_status(status, limit)
        lda = OnlineLDA(vocab, K, D, 1./K, 1./K, 1024., 0.7)
        make_topic_columns(lda, data, K, D, batchsize)
        summary = make_topic_summary(vocab, lda._lambda, n)
        pieces.append(summary)
    return pd.concat(pieces, keys=cu.labels)