示例#1
0
def build_and_save_lsi_model():
    print('Connecting to the database...')
    sentences = SentencesIterator(tokens_generator)
    dct = Dictionary(sentences)
    # Corpus as dictionary ids lists, in memory
    # Can be transformed in an iterable as done with the others if needed
    print('Calculating the LSI model...')
    bow_corpus = [dct.doc2bow(s) for s in sentences]
    model = LsiModel(bow_corpus, id2word=dct)
    model.print_debug()
    model.save(LSI_MODEL_FILE)
    for t in range(model.get_topics().shape[0]):
        print(t)
        print(model.print_topic(t))