예제 #1
0
파일: lda.py 프로젝트: rohittjob/Trends
"""
Apply the LDA algorithm using libraries.
Use the params in the config file.
Save the LDA model.
"""
from os.path import join

from gensim import corpora, models

from utilities.config import NUMBER_OF_TOPICS, NUMBER_OF_PASSES, ALPHA
from utilities.constants import *
from utilities.os_util import get_dir
from utilities.time_management import start_timing, stop_timing, get_time, get_today, get_date_string


TODAY = get_today()
TODAY_STRING = get_date_string(TODAY)

ROOT = get_dir(__file__)
DICTIONARY_PATH = join(ROOT, DATA_DIR, DICTIONARY_PREFIX + TODAY_STRING + DICT)
CORPUS_PATH = join(ROOT, DATA_DIR, CORPUS_PREFIX + TODAY_STRING + MM)
LDA_PATH = join(ROOT, DATA_DIR, LDA_MODEL_PREFIX + TODAY_STRING + LDA)

CORPUS = corpora.MmCorpus(CORPUS_PATH)
DICTIONARY = corpora.Dictionary.load(DICTIONARY_PATH)


def execute():

    print 'Started LDA at ' + get_time() + '... ',
예제 #2
0
"""
import os
from os.path import join

import pytz
from pymongo import MongoClient

from utilities.config import DAY_START, APPROXIMATION_RANGE, NUMBER_OF_TOPICS
from utilities.constants import *
from utilities.os_util import get_dir, get_files_in_dir
from utilities.time_management import get_today, get_prev_day, get_datetime_from_string, get_next_day, \
    get_date_time_string, convert_datetime_to_local, localize_datetime, get_differenced_day

PROJECT_ROOT = get_dir(get_dir(get_dir(__file__)))
TSV_DIR_PATH = join(PROJECT_ROOT, WEBSITE_DIR, STATIC_DIR, TSV_DIR)
START_DATE = get_differenced_day(get_today(), -16) #get_prev_day(get_today())
print START_DATE
client = MongoClient()
topics_db = client['tweets']

entities = []


def remove_previous_data():
    tsv_files = get_files_in_dir(TSV_DIR_PATH, TSV)
    for tsv_file in tsv_files:
        os.remove(join(TSV_DIR_PATH, tsv_file))


def init_writer(tid):
    filename = TOPIC_FILE_PREFIX + str(tid) + TSV
예제 #3
0
from bson.code import Code
from os.path import join

from pymongo import MongoClient

from utilities.constants import *

from utilities.entities.Collection import Collection
from utilities.mongo import check_or_create_collection, copy_into_collection
from utilities.os_util import get_dir
from utilities.time_management import get_today, get_date_string, start_timing, stop_timing

ROOT = get_dir(__file__)
JAVASCRIPT_PATH = join(ROOT, JAVASCRIPT_DIR)

TODAY = get_today()
TODAY_STRING = get_date_string(TODAY)
COLLECTION_NAME = RAW_COLLECTION_PREFIX + TODAY_STRING
RESULTS_COLLECTION_NAME = ENTITY_RESULTS_COLLECTION_PREFIX + TODAY_STRING

check_or_create_collection(RAW_TWEETS_DB_NAME, TEMP_RAW_COLLECTION_NAME,
                           Collection.TEMP)
check_or_create_collection(RAW_TWEETS_DB_NAME, COLLECTION_NAME, Collection.RAW)
check_or_create_collection(RAW_TWEETS_DB_NAME, TEMP_RESULTS_COLLECTION_NAME,
                           Collection.ENTITY_RESULT)
check_or_create_collection(RAW_TWEETS_DB_NAME, RESULTS_COLLECTION_NAME,
                           Collection.ENTITY_RESULT)

client = MongoClient()
db = client[RAW_TWEETS_DB_NAME]
coll = db[COLLECTION_NAME]
"""
import os
from os.path import join

import pytz
from pymongo import MongoClient

from utilities.config import DAY_START, APPROXIMATION_RANGE, NUMBER_OF_TOPICS
from utilities.constants import *
from utilities.os_util import get_dir, get_files_in_dir
from utilities.time_management import get_today, get_prev_day, get_datetime_from_string, get_next_day, \
    get_date_time_string, convert_datetime_to_local, localize_datetime, get_differenced_day

PROJECT_ROOT = get_dir(get_dir(get_dir(__file__)))
TSV_DIR_PATH = join(PROJECT_ROOT, WEBSITE_DIR, STATIC_DIR, TSV_DIR)
START_DATE = get_differenced_day(get_today(), -16)  #get_prev_day(get_today())
print START_DATE
client = MongoClient()
topics_db = client['tweets']

entities = []


def remove_previous_data():
    tsv_files = get_files_in_dir(TSV_DIR_PATH, TSV)
    for tsv_file in tsv_files:
        os.remove(join(TSV_DIR_PATH, tsv_file))


def init_writer(tid):
    filename = TOPIC_FILE_PREFIX + str(tid) + TSV