def read_csv_file(source_file): topic_models_lists = {} with open(source_file,"rb") as f: spamreader = csv.reader(f) for row in spamreader: labels = row[1].split() single_model = Model(remove_stopwords=False,text_string=row[2], need_stem=True, input_stemmed=True) single_model.to_dirichlet() for topic in labels: if topic not in topic_models_lists: topic_models_lists[topic] = [] topic_models_lists[topic].append(single_model) topic_models = {} for topic in topic_models_lists: topic_models[topic] = Model(remove_stopwords=False, need_stem=True, input_stemmed=True) for single_model in topic_models_lists[topic]: topic_models[topic] += single_model topic_models[topic].to_dirichlet() print "Finished Reading models" return topic_models
def get_all_words(example_result_tuples): word_model = Model(True,need_stem=True) for single_tuple in example_result_tuples: word_model += Sentence(single_tuple['sentence'],remove_stopwords=True).stemmed_model word_model.to_dirichlet() return word_model
def get_all_words(result_tuples): word_model = Model(False,need_stem=False) for single_tuple in result_tuples: word_model += Sentence(single_tuple['sentence'],remove_stopwords=False).raw_model word_model.to_dirichlet() return word_model
def get_all_words(example_result_tuples): word_model = Model(True, need_stem=True) for single_tuple in example_result_tuples: word_model += Sentence(single_tuple['sentence'], remove_stopwords=True).stemmed_model word_model.to_dirichlet() return word_model
def get_all_words(result_tuples): word_model = Model(False, need_stem=False) for single_tuple in result_tuples: word_model += Sentence(single_tuple['sentence'], remove_stopwords=False).raw_model word_model.to_dirichlet() return word_model
def get_all_verbs(result_tuples): verb_model = Model(False,need_stem=False) for single_tuple in result_tuples: word = single_tuple['verb'] # if single_tuple['verb_label'] != 'VB': # word = WordNetLemmatizer().lemmatize(word,'v') try: verb_model.update(text_list=[str(word)]) except TypeError: print "Wrong Word!" print word print type(word) print single_tuple sys.exit(0) verb_model.to_dirichlet() return verb_model
def get_all_verbs(example_result_tuples): verb_model = Model(True, need_stem=True) for single_tuple in example_result_tuples: word = single_tuple['verb'] if single_tuple['verb_label'] != 'VB': word = WordNetLemmatizer().lemmatize(word, 'v') try: verb_model.update(text_list=[str(word)]) except TypeError: print "Wrong Word!" print word print type(word) print single_tuple sys.exit(0) verb_model.to_dirichlet() return verb_model
def get_all_verbs(result_tuples, normalize): verb_model = Model(False, need_stem=False) for single_tuple in result_tuples: word = single_tuple['verb'] # if single_tuple['verb_label'] != 'VB': # word = WordNetLemmatizer().lemmatize(word,'v') try: verb_model.update(text_list=[str(word)]) except TypeError: print "Wrong Word!" print word print type(word) print single_tuple sys.exit(0) if normalize: verb_model.to_dirichlet() return verb_model