def read_csv_file(source_file):
    topic_models_lists = {}
    with open(source_file,"rb") as f:
        spamreader = csv.reader(f)
        for row in spamreader:
            labels = row[1].split()
            single_model = Model(remove_stopwords=False,text_string=row[2],
                                 need_stem=True, input_stemmed=True)
            single_model.to_dirichlet()
            for topic in labels:
                if topic not in topic_models_lists:
                    topic_models_lists[topic] = []
                
                topic_models_lists[topic].append(single_model)

    topic_models = {}
    for topic in topic_models_lists:
        topic_models[topic] = Model(remove_stopwords=False,
                                    need_stem=True, input_stemmed=True)
        for single_model in topic_models_lists[topic]:
            topic_models[topic] += single_model

        topic_models[topic].to_dirichlet()

    print "Finished Reading models"
    return topic_models
def get_all_words(example_result_tuples):
    
    word_model = Model(True,need_stem=True)

    for single_tuple in example_result_tuples:
        word_model += Sentence(single_tuple['sentence'],remove_stopwords=True).stemmed_model

    word_model.to_dirichlet()

    return word_model
def get_all_words(result_tuples):
    
    word_model = Model(False,need_stem=False)

    for single_tuple in result_tuples:
        word_model += Sentence(single_tuple['sentence'],remove_stopwords=False).raw_model

    word_model.to_dirichlet()

    return word_model
def get_all_words(example_result_tuples):

    word_model = Model(True, need_stem=True)

    for single_tuple in example_result_tuples:
        word_model += Sentence(single_tuple['sentence'],
                               remove_stopwords=True).stemmed_model

    word_model.to_dirichlet()

    return word_model
示例#5
0
def get_all_words(result_tuples):

    word_model = Model(False, need_stem=False)

    for single_tuple in result_tuples:
        word_model += Sentence(single_tuple['sentence'],
                               remove_stopwords=False).raw_model

    word_model.to_dirichlet()

    return word_model
def get_all_verbs(result_tuples):
    verb_model = Model(False,need_stem=False)

    for single_tuple in result_tuples:
        word = single_tuple['verb']
        # if single_tuple['verb_label'] != 'VB':
        #     word = WordNetLemmatizer().lemmatize(word,'v')
        try:
            verb_model.update(text_list=[str(word)])
        except TypeError:
            print "Wrong Word!"
            print word
            print type(word)
            print single_tuple
            sys.exit(0)
    verb_model.to_dirichlet()

    return verb_model
def get_all_verbs(example_result_tuples):
    verb_model = Model(True, need_stem=True)

    for single_tuple in example_result_tuples:
        word = single_tuple['verb']
        if single_tuple['verb_label'] != 'VB':
            word = WordNetLemmatizer().lemmatize(word, 'v')
        try:
            verb_model.update(text_list=[str(word)])
        except TypeError:
            print "Wrong Word!"
            print word
            print type(word)
            print single_tuple
            sys.exit(0)
    verb_model.to_dirichlet()

    return verb_model
示例#8
0
def get_all_verbs(result_tuples, normalize):
    verb_model = Model(False, need_stem=False)

    for single_tuple in result_tuples:
        word = single_tuple['verb']
        # if single_tuple['verb_label'] != 'VB':
        #     word = WordNetLemmatizer().lemmatize(word,'v')
        try:
            verb_model.update(text_list=[str(word)])
        except TypeError:
            print "Wrong Word!"
            print word
            print type(word)
            print single_tuple
            sys.exit(0)
    if normalize:
        verb_model.to_dirichlet()

    return verb_model