Python clean示例

编程语言: Python

命名空间/包名称: utilities.cleaner

方法/功能: clean

hotexamples.com的示例: 3

Python clean - 已找到3个示例。这些是从开源项目中提取的最受好评的utilities.cleaner.clean现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： tweet_segregation.py 项目： interventionlabs/Trends

def execute():
    print 'Started at ' + get_time() + '... ',
    start_timing()

    client.drop_database(TOPIC_TWEETS_DB_NAME)
    results = entity_results_coll.find(limit=NUMBER_OF_TOP_ENTITIES, no_cursor_timeout=True) \
        .sort([(VALUE + '.' + COUNT, DESCENDING)])

    for result in results:
        tweets = []
        text = []
        lower_entity = result[LOWER_ENTITY]
        entities = result[VALUE][PSEUDONYMS]
        entity_pseudos[lower_entity] = entities

        max_tweets = 0
        for entity in entities:
            c = 0
            for tweet in raw_collection.find({ENTITIES: entity}):
                c += 1
                tweets.append(tweet)
                text.append(tweet[TWEET])
            if c > max_tweets:
                actual_entity[lower_entity] = entity
                max_tweets = c

        text = clean(text)
        topic_id = get_topic_for_entity(text, tweets)
        entity_topic[lower_entity] = topic_id

    save_to_collection()
    save_model_data()

    print 'Finished'
    stop_timing()

示例#2

显示文件

文件： lda_preprocessing.py 项目： interventionlabs/Trends

def execute():
    start_timing()
    print 'Starting Pre-processing for LDA...',

    documents = get_documents()
    tokenized_documents = clean(documents)

    dictionary = corpora.Dictionary([doc for doc in tokenized_documents])
    dictionary.compactify()
    dictionary.save(DICTIONARY_PATH)

    corpus = [dictionary.doc2bow(doc) for doc in tokenized_documents]
    corpora.MmCorpus.serialize(CORPUS_PATH, corpus)

    print 'Finished'
    stop_timing()

    client.close()

示例#3

显示文件

文件： lda_preprocessing.py 项目： rohittjob/Trends

def execute():
    start_timing()
    print 'Starting Pre-processing for LDA...',

    documents = get_documents()
    tokenized_documents = clean(documents)

    dictionary = corpora.Dictionary([doc for doc in tokenized_documents])
    dictionary.compactify()
    dictionary.save(DICTIONARY_PATH)

    corpus = [dictionary.doc2bow(doc) for doc in tokenized_documents]
    corpora.MmCorpus.serialize(CORPUS_PATH, corpus)

    print 'Finished'
    stop_timing()

    client.close()