예제 #1
0
파일: lda.py 프로젝트: rohittjob/Trends
Apply the LDA algorithm using libraries.
Use the params in the config file.
Save the LDA model.
"""
from os.path import join

from gensim import corpora, models

from utilities.config import NUMBER_OF_TOPICS, NUMBER_OF_PASSES, ALPHA
from utilities.constants import *
from utilities.os_util import get_dir
from utilities.time_management import start_timing, stop_timing, get_time, get_today, get_date_string


TODAY = get_today()
TODAY_STRING = get_date_string(TODAY)

ROOT = get_dir(__file__)
DICTIONARY_PATH = join(ROOT, DATA_DIR, DICTIONARY_PREFIX + TODAY_STRING + DICT)
CORPUS_PATH = join(ROOT, DATA_DIR, CORPUS_PREFIX + TODAY_STRING + MM)
LDA_PATH = join(ROOT, DATA_DIR, LDA_MODEL_PREFIX + TODAY_STRING + LDA)

CORPUS = corpora.MmCorpus(CORPUS_PATH)
DICTIONARY = corpora.Dictionary.load(DICTIONARY_PATH)


def execute():

    print 'Started LDA at ' + get_time() + '... ',

    start_timing()
예제 #2
0
Perform cleaning
Create and save dictionary and corpus
"""
from os.path import join

from gensim import corpora
from pymongo import MongoClient, DESCENDING

from utilities.cleaner import clean
from utilities.config import NUMBER_OF_TOP_ENTITIES, TWEET_POOLING_SIZE
from utilities.constants import *
from utilities.os_util import get_dir
from utilities.time_management import get_prev_day, get_today, get_date_string, start_timing, stop_timing

TODAY = get_today()
TODAY_STRING = get_date_string(TODAY)
COLLECTION_DAY = TODAY  #get_prev_day(TODAY)
COLLECTION_DAY_STRING = get_date_string(COLLECTION_DAY)

ROOT = get_dir(__file__)
DICTIONARY_PATH = join(ROOT, DATA_DIR, DICTIONARY_PREFIX + TODAY_STRING + DICT)
CORPUS_PATH = join(ROOT, DATA_DIR, CORPUS_PREFIX + TODAY_STRING + MM)

COLLECTION_NAME = RAW_COLLECTION_PREFIX + COLLECTION_DAY_STRING
RESULTS_COLLECTION_NAME = ENTITY_RESULTS_COLLECTION_PREFIX + COLLECTION_DAY_STRING

client = MongoClient()
raw_db = client[RAW_TWEETS_DB_NAME]
raw_collection = raw_db[COLLECTION_NAME]
results_coll = raw_db[RESULTS_COLLECTION_NAME]
예제 #3
0
from os.path import join

from pymongo import MongoClient

from utilities.constants import *

from utilities.entities.Collection import Collection
from utilities.mongo import check_or_create_collection, copy_into_collection
from utilities.os_util import get_dir
from utilities.time_management import get_today, get_date_string, start_timing, stop_timing

ROOT = get_dir(__file__)
JAVASCRIPT_PATH = join(ROOT, JAVASCRIPT_DIR)

TODAY = get_today()
TODAY_STRING = get_date_string(TODAY)
COLLECTION_NAME = RAW_COLLECTION_PREFIX + TODAY_STRING
RESULTS_COLLECTION_NAME = ENTITY_RESULTS_COLLECTION_PREFIX + TODAY_STRING

check_or_create_collection(RAW_TWEETS_DB_NAME, TEMP_RAW_COLLECTION_NAME,
                           Collection.TEMP)
check_or_create_collection(RAW_TWEETS_DB_NAME, COLLECTION_NAME, Collection.RAW)
check_or_create_collection(RAW_TWEETS_DB_NAME, TEMP_RESULTS_COLLECTION_NAME,
                           Collection.ENTITY_RESULT)
check_or_create_collection(RAW_TWEETS_DB_NAME, RESULTS_COLLECTION_NAME,
                           Collection.ENTITY_RESULT)

client = MongoClient()
db = client[RAW_TWEETS_DB_NAME]
coll = db[COLLECTION_NAME]
temp_raw = db[TEMP_RAW_COLLECTION_NAME]
예제 #4
0
import io
from os.path import join

from gensim import models, corpora
from pymongo import MongoClient, DESCENDING

from utilities.cleaner import clean
from utilities.constants import *
from utilities.config import NUMBER_OF_TOP_ENTITIES, NUMBER_OF_TOPICS
from utilities.entities.Collection import Collection
from utilities.mongo import check_or_create_collection, copy_into_collection
from utilities.os_util import get_dir
from utilities.time_management import get_prev_day, start_timing, stop_timing, get_today, get_date_string, get_time, get_differenced_day

TODAY = get_today()
TODAY_STRING = get_date_string(TODAY)
COLLECTION_DAY = TODAY #get_prev_day(TODAY)
COLLECTION_DAY_STRING = get_date_string(COLLECTION_DAY)

ROOT = get_dir(__file__)
DICTIONARY_PATH = join(ROOT, DATA_DIR, DICTIONARY_PREFIX + TODAY_STRING + DICT)
CORPUS_PATH = join(ROOT, DATA_DIR, CORPUS_PREFIX + TODAY_STRING + MM)
LDA_PATH = join(ROOT, DATA_DIR, LDA_MODEL_PREFIX + TODAY_STRING + LDA)
MODEL_DATA_PATH = join(ROOT, MODEL_DATA_DIR)

CORPUS = corpora.MmCorpus(CORPUS_PATH)
DICTIONARY = corpora.Dictionary.load(DICTIONARY_PATH)
LDA_MODEL = models.LdaModel.load(LDA_PATH)

COLLECTION_NAME = RAW_COLLECTION_PREFIX + COLLECTION_DAY_STRING
RESULTS_COLLECTION_NAME = ENTITY_RESULTS_COLLECTION_PREFIX + COLLECTION_DAY_STRING