def __init__(self, debug=False): self.debug = debug self.config = load_config("./config/assed_config.json") self._encoder = KeyedVectors.load_word2vec_format( './pipelines/assed_landslide/ml/encoders/GoogleNews-vectors-negative300.bin', binary=True, unicode_errors='ignore', limit=100000) self.zero_v = zeros(shape=(300, )) self.model = keras.models.load_model( "./pipelines/assed_landslide/ml/models/tf_model.h5") self.DB_CONN = get_db_connection(self.config) self.cursor = self.DB_CONN.cursor() pass self.cursor_timer = time.time() self.cursor_refresh = 300 self.true_counter = 0 self.false_counter = 0 self.total_counter = 0 self.db_insert = 'INSERT INTO ASSED_Social_Events ( \ social_id, cell, \ latitude, longitude, timestamp, link, text, location, topic_name, source, valid, streamtype) \ VALUES (%s,%s,%s,%s,%s,%s, %s, %s,%s, %s, %s, %s)' self.stream_tracker = {}
def __init__(self, debug=False): self.debug = debug self.time = time.time() pool = redis.ConnectionPool(host='localhost',port=6379, db=0) self.r=redis.Redis(connection_pool = pool) self.timecheck = 600 self.locations = {} self.update_location_store() self.NER = Ner(host="localhost", port=9199) self.counter = 0 self.memory={} config = load_config("./config/assed_config.json") self.APIKEY = config["APIKEYS"]["googlemaps"] self.stream_tracker = {}
def __init__(self, debug=False): self.debug = debug self.config = load_config("./config/assed_config.json") self.DB_CONN = get_db_connection(self.config) self.cursor = self.DB_CONN.cursor() pass self.cursor_timer = time.time() self.cursor_refresh = 300 self.MS_IN_DAYS = 86400000 self.true_counter = 0 self.unk = 0 self.stream_tracker = {}
# This initializees several things... import pdb import argparse import glob from utils import db_utils from utils.file_utils import load_config parser = argparse.ArgumentParser(description="Initialize sets up various parts of LITMUS") parser.add_argument("--env", choices=["mysql", "dirs"], help="Environment to setup") argums = vars(parser.parse_args()) assed_config = load_config('config/assed_config.json') if argums['env'] == 'dirs': import os dirs = ['downloads','logfiles', 'config', 'redis', 'ml', 'ml/models', 'ml/encoders'] for directory in dirs: if not os.path.exists(directory): os.makedirs(directory) if argums['env'] == 'mysql': #set up mysql stuff (news and everything) db_conn = db_utils.get_db_connection(assed_config) for file_ in glob.glob('initialization/mysql/*.SQL'): db_utils.run_sql_file(file_,db_conn) db_conn.close()
import multiprocessing from SocialStreamerSrc.TweetProcess import TweetProcess from SocialStreamerSrc.KeyServer import KeyServer # Utils import from utils.file_utils import load_config from utils.helper_utils import dict_equal, setup_pid, readable_time, std_flush import utils.CONSTANTS as CONSTANTS SOCIAL_STREAMER_FIRST_FILE_CHECK = True if __name__ == '__main__': pid_name = os.path.basename(sys.argv[0]).split('.')[0] setup_pid(pid_name) #Set up configOriginal dict configOriginal = load_config(CONSTANTS.TOPIC_CONFIG_PATH) StreamerManager = {} for _streamer_ in configOriginal["SocialStreamers"]: StreamerManager[_streamer_] = {} StreamerManager[_streamer_]["name"] = configOriginal[ "SocialStreamers"][_streamer_]["name"] StreamerManager[_streamer_]["type"] = configOriginal[ "SocialStreamers"][_streamer_]["type"] StreamerManager[_streamer_]["apikey_name"] = configOriginal[ "SocialStreamers"][_streamer_]["apikey"] StreamerManager[_streamer_]["apimax"] = configOriginal[ "SocialStreamers"][_streamer_]["apimax"] _scriptname = configOriginal["SocialStreamers"][_streamer_]["script"] moduleImport = __import__("SocialStreamerSrc.%s" % _scriptname, fromlist=[_scriptname]) StreamerManager[_streamer_]["executor"] = getattr(
import multiprocessing from utils.file_utils import load_config from utils.helper_utils import setup_pid, readable_time, std_flush import utils.CONSTANTS as CONSTANTS from SocialStreamFileProcessorSrc.StreamFilesProcessor import StreamFilesProcessor if __name__ == "__main__": #set up the PID for this pid_name = os.path.basename(sys.argv[0]).split('.')[0] setup_pid(pid_name) #Load the keywords keywordConfig = load_config(CONSTANTS.TOPIC_CONFIG_PATH) errorQueue = multiprocessing.Queue() messageQueue = multiprocessing.Queue() keyStreamConfig = {} # for each keyword-lang pair type, launch a StreamFilesProcessor for physicalEvent in keywordConfig['topic_names'].keys(): for language in keywordConfig['topic_names'][physicalEvent]["languages"]: eventLangTuple = (physicalEvent,language) keyStreamConfig[eventLangTuple] = {} keyStreamConfig[eventLangTuple]['name'] = physicalEvent keyStreamConfig[eventLangTuple]['lang'] = language keyStreamConfig[eventLangTuple]['keywords'] = keywordConfig['topic_names'][physicalEvent]["languages"][language] keyStreamConfig[eventLangTuple]['postpone'] = False std_flush(" ".join(["Deploying",str(eventLangTuple), "at", readable_time()])) try:
def main(): local_timer = 0 refresh_timer = 7200 sleep_timer = 300 while True: if time.time() - local_timer > refresh_timer: local_timer = time.time() helper_utils.std_flush("[%s] -- Initializing EventDetection" % helper_utils.readable_time()) cell_cache = {} assed_config = file_utils.load_config("./config/assed_config.json") helper_utils.std_flush("[%s] -- Obtained DB Connection" % helper_utils.readable_time()) DB_CONN = db_utils.get_db_connection(assed_config) cursor = DB_CONN.cursor() available_streamers = [ item for item in assed_config["SocialStreamers"] ] streamer_results = {} helper_utils.std_flush( "[%s] -- Available streamers: %s" % (helper_utils.readable_time(), str(available_streamers))) for _streamer_ in available_streamers: helper_utils.std_flush( "[%s] -- Generating query for: %s" % (helper_utils.readable_time(), _streamer_)) _query_ = generate_social_query(_streamer_=_streamer_, _topic_="landslide") cursor.execute(_query_) streamer_results[_streamer_] = cursor.fetchall() helper_utils.std_flush( "[%s] -- Obtained results for : %s" % (helper_utils.readable_time(), _streamer_)) helper_utils.std_flush("[%s] -- Generating query for: %s" % (helper_utils.readable_time(), "TRMM")) _query_ = generate_trmm_query() cursor.execute(_query_) trmm_results = cursor.fetchall() helper_utils.std_flush("[%s] -- Obtained resuts for: %s" % (helper_utils.readable_time(), "TRMM")) helper_utils.std_flush("[%s] -- Generating query for: %s" % (helper_utils.readable_time(), "USGS")) _query_ = generate_usgs_query() cursor.execute(_query_) usgs_results = cursor.fetchall() helper_utils.std_flush("[%s] -- Obtained resuts for: %s" % (helper_utils.readable_time(), "USGS")) helper_utils.std_flush("[%s] -- Generating query for: %s" % (helper_utils.readable_time(), "News")) _query_ = generate_news_query() cursor.execute(_query_) news_results = cursor.fetchall() helper_utils.std_flush("[%s] -- Obtained resuts for: %s" % (helper_utils.readable_time(), "News")) cursor.close() helper_utils.std_flush( "[%s] -- Generating local cache with scoring:\tSocial-ML - 0.3\tSocial-HDI - 1\tNews - 3\tUSGS - 5\tTRMM - 1" % helper_utils.readable_time()) # Scoring -- Twitter-Social: 0.3 Twitter-HDI - 1 News: 3 USGS: 5 TRMM: 1 for _streamer_ in streamer_results: helper_utils.std_flush( "[%s] -- Local caching for %s" % (helper_utils.readable_time(), _streamer_)) for tuple_cell_ in streamer_results[_streamer_]: _cell_ = tuple_cell_[0] if _cell_ not in cell_cache: cell_cache[_cell_] = {} if int(float(tuple_cell_[1])) > 0: cell_cache[_cell_][_streamer_ + "-hdi"] = (int( float(tuple_cell_[1])), float(tuple_cell_[1])) if int(float(tuple_cell_[2]) / 0.34) > 0: cell_cache[_cell_][_streamer_ + "-ml"] = (int( float(tuple_cell_[2]) / 0.34), float( tuple_cell_[2])) helper_utils.std_flush("[%s] -- Local caching for %s" % (helper_utils.readable_time(), "TRMM")) for tuple_cell_ in trmm_results: _cell_ = tuple_cell_[0] if _cell_ not in cell_cache: cell_cache[_cell_] = {} cell_cache[_cell_]["TRMM"] = (float(tuple_cell_[1]), float(tuple_cell_[1] * 1) ) # 1 <-- TRMM score helper_utils.std_flush("[%s] -- Local caching for %s" % (helper_utils.readable_time(), "USGS")) for tuple_cell_ in usgs_results: _cell_ = tuple_cell_[0] if _cell_ not in cell_cache: cell_cache[_cell_] = {} cell_cache[_cell_]["USGS"] = (float(tuple_cell_[1]), float(tuple_cell_[1] * 5)) helper_utils.std_flush("[%s] -- Local caching for %s" % (helper_utils.readable_time(), "News")) for tuple_cell_ in news_results: _cell_ = tuple_cell_[0] if _cell_ not in cell_cache: cell_cache[_cell_] = {} cell_cache[_cell_]["News"] = (float(tuple_cell_[1]), float(tuple_cell_[1] * 3)) helper_utils.std_flush( "[%s] -- Local cache score total generation" % helper_utils.readable_time()) for _cell_ in cell_cache: cell_cache[_cell_]["total"] = sum([ cell_cache[_cell_][item][1] for item in cell_cache[_cell_] ]) pool = redis.ConnectionPool(host='localhost', port=6379, db=0) r = redis.Redis(connection_pool=pool) helper_utils.std_flush("[%s] -- Connected to Redis" % helper_utils.readable_time()) # Correct-key -- v1 or v2 # Key Push # Actual keys... # list_tracker_key tracks where the data is (either v1 or v2) # list_push_key contains the list of cells list_tracker_key = "assed:event:detection:multisource:listkey" list_push_key = "assed:event:detection:multisource:list" list_info_key = "assed:event:detection:multisource:info" key_version = r.get(list_tracker_key) if key_version is None: key_version = "v2" else: key_version = key_version.decode() push_key = 'v1' if key_version == 'v1': helper_utils.std_flush( "[%s] -- v1 key already in effect. Pushing to v2" % helper_utils.readable_time()) push_key = 'v2' else: helper_utils.std_flush( "[%s] -- v2 key already in effect. Pushing to v1" % helper_utils.readable_time()) cell_list = [item for item in cell_cache] true_list_push_key = list_push_key + ":" + push_key helper_utils.std_flush( "[%s] -- Deleting existing %s, if any" % (helper_utils.readable_time(), true_list_push_key)) r.delete(true_list_push_key) r.lpush(true_list_push_key, *cell_list) helper_utils.std_flush( "[%s] -- Pushed cell list to %s" % (helper_utils.readable_time(), true_list_push_key)) helper_utils.std_flush("[%s] -- Pushing individual cell results" % helper_utils.readable_time()) cell_counter = 0 for _cell_ in cell_cache: cell_push_contents = json.dumps(cell_cache[_cell_]) cell_specific_suffix = ":".join(_cell_.split("_")) cell_push_key = ":".join( [list_info_key, cell_specific_suffix, push_key]) r.set(cell_push_key, cell_push_contents) if cell_counter == 0: helper_utils.std_flush("[%s] -- First push: %s --- %s" % (helper_utils.readable_time(), cell_push_key, cell_push_contents)) cell_counter += 1 helper_utils.std_flush( "[%s] -- Completed individual cell pushes with %s cells" % (helper_utils.readable_time(), str(cell_counter))) r.set(list_tracker_key, push_key) helper_utils.std_flush( "[%s] -- Setting versioning in %s to %s" % (helper_utils.readable_time(), list_tracker_key, push_key)) helper_utils.std_flush("-------- COMPLETE AT %s ----------\n" % helper_utils.readable_time()) else: #helper_utils.std_flush("Sleeping for %s"%sleep_timer) time.sleep(sleep_timer)
import sys, time, os, json, codecs, traceback import pdb from datetime import datetime import multiprocessing from utils.file_utils import load_config from utils.helper_utils import dict_equal, setup_pid, readable_time, std_flush import utils.CONSTANTS as CONSTANTS if __name__ == '__main__': pid_name = os.path.basename(sys.argv[0]).split('.')[0] setup_pid(pid_name) assed_config = load_config(CONSTANTS.ASSED_CONFIG) configOriginal = load_config(CONSTANTS.HIGH_CONFIDENCE_CONFIG_PATH) HCS_configuration = {} errorQueue = multiprocessing.Queue() messageQueue = multiprocessing.Queue() for hcs_type in configOriginal: _cfg = configOriginal[hcs_type] kwargs = {} HCS_configuration[hcs_type] = {} HCS_configuration[hcs_type]["name"] = _cfg["name"] HCS_configuration[hcs_type]["db_name"] = _cfg["db_name"] HCS_configuration[hcs_type]["source_file"] = _cfg["source_file"] HCS_configuration[hcs_type]["type"] = _cfg["type"] if HCS_configuration[hcs_type]["type"] == "scheduled": HCS_configuration[hcs_type]["schedule"] = _cfg["schedule"]