def __init__(self, n_topics, corpus, words2vec_ny, words2vec, vocab_ny, alpha=None): self.n_dk = None self.corpus = corpus # corpus.index2doc self.priors = None self.n_topics = n_topics self.vocab_ny = vocab_ny #set([]) self.topic_params = defaultdict(dict) self.topic_params2 = defaultdict(dict) self.word_vec_size = words2vec.vector_size self.alpha = alpha self.vocab2topic = {} self.words2vec = words2vec self.words2vec_ny = words2vec_ny self.solver = cholesky.Helper() self.AVGLD = util.CalculateComplexity() self.N_sum = 0 self.logDeterminant = [] self.z_dw = [] assert isinstance(self.n_topics, int), 'n_topic should be an integer'
def __init__(self, num_topics, corpus, word_vector_filepath=None, word_vector_model=None, alpha=0.2, outputfile=None, preprocess=False): self.doc_topic_CT = None self.corpus = corpus self.priors = None self.word_vecs = {} self.numtopics = num_topics self.vocab = set([]) self.topic_params = defaultdict(dict) self.wordvecFP = word_vector_filepath self.word_vec_size = None self.alpha = alpha self.solver = cholesky.Helper() self.wvmodel = word_vector_model self.test_word_topics = defaultdict(list) self.test_word_topic_count = defaultdict(int) self.word_topics = {} self.output_file_name = outputfile self.preprocess = preprocess
def __init__(self, num_topics, corpus, word_vector_filepath=None, word_vector_model=None, run_name=str(1)): self.doc_topic_CT = None self.word_topics = {} self.corpus = corpus self.priors = None self.word_vecs = {} self.numtopics = num_topics self.vocab = set([]) self.topic_params = defaultdict(dict) self.wordvecFP = word_vector_filepath self.word_index = {} self.word_vec_size = None self.alpha = 20. / self.numtopics self.solver = cholesky.Helper() self.wvmodel = word_vector_model self.doc_word_counts = {} self.run_name = run_name