Пример #1
0
import os

from corpus.textExtractor import TextExtractor
from corpus.bowBuilder import BowBuilder
from lda.ldaCalc import LdaCalc

cache_root = os.path.join(os.getcwd(), "cache")

# start here!
# get text
print("\nextracting text from corpus ------------------------------------\n")
extractor = TextExtractor(cache_root)
extractor.get_texts()
# extractor.save()

# convert to BOW vectors
print("\nbuilding BOW vectors from corpus ------------------------------------\n")
bowBuilder = BowBuilder(docs=extractor.texts, cache_dir=cache_root)
bowBuilder.generate_bows()
bowBuilder.save()

# run the LDA
print("\ntraining LDA model -----------------------------------------------\n")
lda = LdaCalc(bowBuilder.bowVectorCorpus, bowBuilder.id2word, cache_root)
lda.run_lda()
print("\nsaving LDA model -----------------------------------------------\n")
lda.save()
lda.print_topics()


Пример #2
0
__author__ = 'thomas'

import os
from corpus.bowBuilder import BowBuilder

from lda.ldaCalc import LdaCalc

cache_root = os.path.join(os.getcwd(), "cache")

bowBuilder = BowBuilder(cache_dir=cache_root)
bowBuilder.load()
bows = bowBuilder.bowVectorCorpus

print("\nLoading LDA model -----------------------------------------------\n")
lda = LdaCalc(bows=bows, sims_cache_dir=cache_root)
lda.load()
lda.print_topics()

lda.calc_sims()
# lda.save_sims()