Exemplo n.º 1
0
    def __init__(self, debug=False):
        self.debug = debug
        self.config = load_config("./config/assed_config.json")

        self._encoder = KeyedVectors.load_word2vec_format(
            './pipelines/assed_landslide/ml/encoders/GoogleNews-vectors-negative300.bin',
            binary=True,
            unicode_errors='ignore',
            limit=100000)
        self.zero_v = zeros(shape=(300, ))
        self.model = keras.models.load_model(
            "./pipelines/assed_landslide/ml/models/tf_model.h5")

        self.DB_CONN = get_db_connection(self.config)
        self.cursor = self.DB_CONN.cursor()
        pass

        self.cursor_timer = time.time()

        self.cursor_refresh = 300
        self.true_counter = 0
        self.false_counter = 0
        self.total_counter = 0

        self.db_insert = 'INSERT INTO ASSED_Social_Events ( \
        social_id, cell, \
        latitude, longitude, timestamp, link, text, location, topic_name, source, valid, streamtype) \
        VALUES (%s,%s,%s,%s,%s,%s, %s, %s,%s, %s, %s, %s)'

        self.stream_tracker = {}
Exemplo n.º 2
0
    def __init__(self,assed_config,root_name, errorQueue, messageQueue, **kwargs):
        multiprocessing.Process.__init__(self)
        # set up DB connections
        self.DB_CONN = get_db_connection(assed_config)

        self.root_name = root_name
        self.errorQueue = errorQueue
        self.messageQueue = messageQueue
        self.cached_list = self.getCachedList()
        pass
Exemplo n.º 3
0
    def __init__(self, debug=False):
        self.debug = debug
        self.config = load_config("./config/assed_config.json")
        self.DB_CONN = get_db_connection(self.config)
        self.cursor = self.DB_CONN.cursor()
        pass

        self.cursor_timer = time.time()

        self.cursor_refresh = 300
        self.MS_IN_DAYS = 86400000
        self.true_counter = 0
        self.unk = 0
        self.stream_tracker = {}
Exemplo n.º 4
0
 def __init__(self, assed_config, root_name, errorQueue, messageQueue,
              **kwargs):
     multiprocessing.Process.__init__(self)
     # set up DB connections
     self.DB_CONN = get_db_connection(assed_config)
     self.client = NewsApiClient(api_key="f715251d799140f793e63a1aec194920")
     self.root_name = root_name
     self.errorQueue = errorQueue
     self.messageQueue = messageQueue
     # No cached list because we are getting new stuff every day...
     self.config = kwargs["config"]
     self.NER = Ner(host='localhost', port=9199)
     pool = redis.ConnectionPool(host='localhost', port=6379, db=0)
     self.r = redis.Redis(connection_pool=pool)
     pass
Exemplo n.º 5
0
def generate_top_slow_trend(case_ids):

    sql_script = """SELECT start_time, case_id, duration
                    FROM aa_test_stats
                    WHERE case_id in %s"""

    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute(sql_script, (tuple(case_ids),))

    df = DataFrame(cursor.fetchall(), columns=['start_time', 'case_id','duration'])

    fig, ax = plt.subplots()
    labels = []
    for key, grp in df.groupby(['case_id']):
        color = get_random_color()
        ax = grp.plot(ax=ax, kind='line', x='start_time', y='duration', c=color)
        labels.append(key)
    lines, _ = ax.get_legend_handles_labels()
    ax.legend(lines, labels, loc=2, prop={'size': 5})
    plt.savefig(TREND_CHART)
Exemplo n.º 6
0
# This initializees several things...
import pdb
import argparse
import glob
from utils import db_utils
from utils.file_utils import load_config

parser = argparse.ArgumentParser(description="Initialize sets up various parts of LITMUS")
parser.add_argument("--env",
                    choices=["mysql", "dirs"],
                    help="Environment to setup")

argums = vars(parser.parse_args())
assed_config = load_config('config/assed_config.json')


if argums['env'] == 'dirs':
    import os
    dirs = ['downloads','logfiles', 'config', 'redis', 'ml', 'ml/models', 'ml/encoders']
    for directory in dirs:
        if not os.path.exists(directory):
            os.makedirs(directory)

if argums['env'] == 'mysql':
    #set up mysql stuff (news and everything)
    db_conn = db_utils.get_db_connection(assed_config)
    for file_ in glob.glob('initialization/mysql/*.SQL'):
        db_utils.run_sql_file(file_,db_conn)
    db_conn.close()
        
Exemplo n.º 7
0
    def load_data(self, response, tp_accessorial):
        acy_table = response[response['Product_Mode'] == 'ACY']
        prod_table = response[response['Product_Mode'] != 'ACY']
        query = ""
        sql = ""

        last = int(self.bid_number[-1])
        if self.get_randomizer(last):
            # check available accessorials
            if not acy_table.empty:
                acy_table = acy_table.drop(
                    'SVC_GRP_NR', axis=1)  # remove due to blank causing issues
                acy_table = acy_table.merge(tp_accessorial,
                                            how='inner',
                                            on=[
                                                'MVM_DRC_CD', 'SVC_FEA_TYP_CD',
                                                'SVM_TYP_CD', 'ASY_SVC_TYP_CD',
                                                'PKG_CHA_TYP_CD',
                                                'PKG_ACQ_MTH_TYP_CD'
                                            ])

            # add regular products
            try:

                connection = get_db_connection()
                cursor = connection.cursor()

                # remove accessorial from list
                acc_svcs = tp_accessorial.SVC_GRP_NR.unique()
                prod_table = prod_table[~prod_table['SVC_GRP_NR'].isin(acc_svcs
                                                                       )]

                # once update once per service group
                prod_table = prod_table[[
                    'SVC_GRP_NR', 'Incentive_Freight', 'Target_Low',
                    'Target_High'
                ]].drop_duplicates()

                for index, row in prod_table.iterrows():
                    # formatting at %.3f truncates the value to 3 decimal places
                    svc_grp_num = row["SVC_GRP_NR"]
                    incentive = round(row["Incentive_Freight"], 3)
                    min_inc = round(row["Target_Low"], 3)
                    max_inc = round(row["Target_High"], 3)

                    query = "UPDATE TNCVCEL " \
                            "SET RCM_NCV_QY = " + str(incentive) + ", NCV_MIN_QY = " + str(
                        min_inc) + ", NCV_MAX_QY = " + str(max_inc) + \
                            " WHERE NVP_BID_NR = '" + str(self.bid_number) + "' AND SVC_GRP_NR = '" + str(svc_grp_num) \
                            + "' AND NCV_DTR_DAT_TYP_CD = 'P'"

                    cursor.execute(query)
                    cursor.execute('COMMIT')

                    cursor.close()
                    connection.close()

                    # logger.info(str(prod_table.shape[0]) + " rows available to update to TNCVCEL")
            except Exception, e:
                raise RuntimeError(
                    'Error 4.1: Oracle DB cannot be updated with query: \n' +
                    query + 'with error: ' + str(e))
Exemplo n.º 8
0
def main():

    local_timer = 0
    refresh_timer = 7200
    sleep_timer = 300
    while True:
        if time.time() - local_timer > refresh_timer:

            local_timer = time.time()

            helper_utils.std_flush("[%s] -- Initializing EventDetection" %
                                   helper_utils.readable_time())
            cell_cache = {}

            assed_config = file_utils.load_config("./config/assed_config.json")

            helper_utils.std_flush("[%s] -- Obtained DB Connection" %
                                   helper_utils.readable_time())
            DB_CONN = db_utils.get_db_connection(assed_config)
            cursor = DB_CONN.cursor()

            available_streamers = [
                item for item in assed_config["SocialStreamers"]
            ]
            streamer_results = {}
            helper_utils.std_flush(
                "[%s] -- Available streamers: %s" %
                (helper_utils.readable_time(), str(available_streamers)))

            for _streamer_ in available_streamers:
                helper_utils.std_flush(
                    "[%s] -- Generating query for: %s" %
                    (helper_utils.readable_time(), _streamer_))
                _query_ = generate_social_query(_streamer_=_streamer_,
                                                _topic_="landslide")
                cursor.execute(_query_)
                streamer_results[_streamer_] = cursor.fetchall()
                helper_utils.std_flush(
                    "[%s] -- Obtained results for : %s" %
                    (helper_utils.readable_time(), _streamer_))

            helper_utils.std_flush("[%s] -- Generating query for: %s" %
                                   (helper_utils.readable_time(), "TRMM"))
            _query_ = generate_trmm_query()
            cursor.execute(_query_)
            trmm_results = cursor.fetchall()
            helper_utils.std_flush("[%s] -- Obtained resuts for: %s" %
                                   (helper_utils.readable_time(), "TRMM"))

            helper_utils.std_flush("[%s] -- Generating query for: %s" %
                                   (helper_utils.readable_time(), "USGS"))
            _query_ = generate_usgs_query()
            cursor.execute(_query_)
            usgs_results = cursor.fetchall()
            helper_utils.std_flush("[%s] -- Obtained resuts for: %s" %
                                   (helper_utils.readable_time(), "USGS"))

            helper_utils.std_flush("[%s] -- Generating query for: %s" %
                                   (helper_utils.readable_time(), "News"))
            _query_ = generate_news_query()
            cursor.execute(_query_)
            news_results = cursor.fetchall()
            helper_utils.std_flush("[%s] -- Obtained resuts for: %s" %
                                   (helper_utils.readable_time(), "News"))
            cursor.close()

            helper_utils.std_flush(
                "[%s] -- Generating local cache with scoring:\tSocial-ML - 0.3\tSocial-HDI - 1\tNews - 3\tUSGS - 5\tTRMM - 1"
                % helper_utils.readable_time())
            # Scoring -- Twitter-Social: 0.3    Twitter-HDI - 1     News:       3       USGS:   5       TRMM:   1
            for _streamer_ in streamer_results:
                helper_utils.std_flush(
                    "[%s] -- Local caching for %s" %
                    (helper_utils.readable_time(), _streamer_))
                for tuple_cell_ in streamer_results[_streamer_]:
                    _cell_ = tuple_cell_[0]
                    if _cell_ not in cell_cache:
                        cell_cache[_cell_] = {}
                    if int(float(tuple_cell_[1])) > 0:
                        cell_cache[_cell_][_streamer_ + "-hdi"] = (int(
                            float(tuple_cell_[1])), float(tuple_cell_[1]))
                    if int(float(tuple_cell_[2]) / 0.34) > 0:
                        cell_cache[_cell_][_streamer_ + "-ml"] = (int(
                            float(tuple_cell_[2]) / 0.34), float(
                                tuple_cell_[2]))

            helper_utils.std_flush("[%s] -- Local caching for %s" %
                                   (helper_utils.readable_time(), "TRMM"))
            for tuple_cell_ in trmm_results:
                _cell_ = tuple_cell_[0]
                if _cell_ not in cell_cache:
                    cell_cache[_cell_] = {}
                cell_cache[_cell_]["TRMM"] = (float(tuple_cell_[1]),
                                              float(tuple_cell_[1] * 1)
                                              )  # 1 <-- TRMM score

            helper_utils.std_flush("[%s] -- Local caching for %s" %
                                   (helper_utils.readable_time(), "USGS"))
            for tuple_cell_ in usgs_results:
                _cell_ = tuple_cell_[0]
                if _cell_ not in cell_cache:
                    cell_cache[_cell_] = {}
                cell_cache[_cell_]["USGS"] = (float(tuple_cell_[1]),
                                              float(tuple_cell_[1] * 5))

            helper_utils.std_flush("[%s] -- Local caching for %s" %
                                   (helper_utils.readable_time(), "News"))
            for tuple_cell_ in news_results:
                _cell_ = tuple_cell_[0]
                if _cell_ not in cell_cache:
                    cell_cache[_cell_] = {}
                cell_cache[_cell_]["News"] = (float(tuple_cell_[1]),
                                              float(tuple_cell_[1] * 3))

            helper_utils.std_flush(
                "[%s] -- Local cache score total generation" %
                helper_utils.readable_time())
            for _cell_ in cell_cache:
                cell_cache[_cell_]["total"] = sum([
                    cell_cache[_cell_][item][1] for item in cell_cache[_cell_]
                ])

            pool = redis.ConnectionPool(host='localhost', port=6379, db=0)
            r = redis.Redis(connection_pool=pool)
            helper_utils.std_flush("[%s] -- Connected to Redis" %
                                   helper_utils.readable_time())

            # Correct-key -- v1 or v2
            # Key Push
            # Actual keys...
            # list_tracker_key tracks where the data is (either v1 or v2)
            # list_push_key contains the list of cells
            list_tracker_key = "assed:event:detection:multisource:listkey"
            list_push_key = "assed:event:detection:multisource:list"
            list_info_key = "assed:event:detection:multisource:info"
            key_version = r.get(list_tracker_key)
            if key_version is None:
                key_version = "v2"
            else:
                key_version = key_version.decode()
            push_key = 'v1'
            if key_version == 'v1':
                helper_utils.std_flush(
                    "[%s] -- v1 key already in effect. Pushing to v2" %
                    helper_utils.readable_time())
                push_key = 'v2'
            else:
                helper_utils.std_flush(
                    "[%s] -- v2 key already in effect. Pushing to v1" %
                    helper_utils.readable_time())

            cell_list = [item for item in cell_cache]
            true_list_push_key = list_push_key + ":" + push_key
            helper_utils.std_flush(
                "[%s] -- Deleting existing %s, if any" %
                (helper_utils.readable_time(), true_list_push_key))
            r.delete(true_list_push_key)

            r.lpush(true_list_push_key, *cell_list)
            helper_utils.std_flush(
                "[%s] -- Pushed cell list to %s" %
                (helper_utils.readable_time(), true_list_push_key))

            helper_utils.std_flush("[%s] -- Pushing individual cell results" %
                                   helper_utils.readable_time())
            cell_counter = 0
            for _cell_ in cell_cache:
                cell_push_contents = json.dumps(cell_cache[_cell_])
                cell_specific_suffix = ":".join(_cell_.split("_"))
                cell_push_key = ":".join(
                    [list_info_key, cell_specific_suffix, push_key])
                r.set(cell_push_key, cell_push_contents)
                if cell_counter == 0:
                    helper_utils.std_flush("[%s] -- First push: %s --- %s" %
                                           (helper_utils.readable_time(),
                                            cell_push_key, cell_push_contents))
                cell_counter += 1

            helper_utils.std_flush(
                "[%s] -- Completed individual cell pushes with %s cells" %
                (helper_utils.readable_time(), str(cell_counter)))

            r.set(list_tracker_key, push_key)
            helper_utils.std_flush(
                "[%s] -- Setting versioning in %s to %s" %
                (helper_utils.readable_time(), list_tracker_key, push_key))

            helper_utils.std_flush("--------   COMPLETE AT  %s ----------\n" %
                                   helper_utils.readable_time())
        else:
            #helper_utils.std_flush("Sleeping for %s"%sleep_timer)
            time.sleep(sleep_timer)
Exemplo n.º 9
0
    def fetch_data(self):

        connection = get_db_connection()
        if connection:

            # Load the views in an order.

            # 1. tp_bid
            q1 = "SELECT * FROM V_TP20_BID where NVP_BID_NR = '%s' " % self.bid_number
            df_tp20_bid = pd.read_sql(q1, con=connection)

            # 2. tp_bid_shpr
            q2 = "SELECT * FROM V_TP20_BID_SHPR_INFO WHERE NVP_BID_NR = '%s' " % self.bid_number
            df_tp20_bid_shpr = pd.read_sql(q2, con=connection).sort_values(
                by=['NVP_BID_NR', 'SHR_AC_NR'], ascending=[1, 0])

            # 3. tp_svc_grp
            q3 = "SELECT * FROM V_TP20_SERVICE_GROUP WHERE NVP_BID_NR = '%s' " % self.bid_number
            df_tp20_svc_grp = pd.read_sql(q3, con=connection)

            # 4. tp_ceiling_svc
            q4 = "SELECT * FROM V_TP20_CEILING_SERVICES WHERE NVP_BID_NR = '%s' " % self.bid_number
            df_tp20_ceiling_svc = pd.read_sql(q4, con=connection)

            # 5. tp20_shpr_svc
            q5 = "SELECT * FROM V_TP20_SHPR_SVC_VOL_REV WHERE NVP_BID_NR = '%s'" % self.bid_number
            df_tp20_shpr_svc = pd.read_sql(q5, con=connection)
            # TODO: Filter out the columns not required.

            # 6. ttpsvgp
            q6 = "SELECT NVP_BID_NR, SVC_GRP_NR, PND_STS_CD, CPE_ETM_RPP_A, SVC_GRP_TRG_PSE_A, SVC_TRG_LOW_RNG_A, " \
                 "SVC_TRG_HI_RNG_A, TRG_PSE_FCR_NR FROM TTPSVGP WHERE NVP_BID_NR = '%s' " % self.bid_number
            df_ttpsvgp = pd.read_sql(q6, con=connection)

            # 7. zone_weight
            q7 = "SELECT NVP_BID_NR,SVC_GRP_NR,SVC_GRP_SUF_NR,DEL_ZN_NR,WGT_MS_UNT_TYP_CD, " \
                 "WGT_CGY_WGY_QY, PKGBOL," \
                 "(CASE WGT_MS_UNT_TYP_CD WHEN 'OZ' " \
                 "THEN cast(WGT_CGY_WGY_QY as DECIMAL(9,2)) / 16.0 " \
                 "ELSE cast(WGT_CGY_WGY_QY as DECIMAL(9,2)) END) " \
                 "as WEIGHT FROM V_TP20_ZONE_WGT_VOL_DIST WHERE NVP_BID_NR = '%s' AND DEL_ZN_NR != 'ALL'" % self.bid_number
            #
            df_zone_weight = pd.read_sql(q7, con=connection)

            # tncvcel
            q8 = "SELECT DISTINCT C.*, D.MVM_DRC_CD, D.SVC_TYP_CD, D.SVC_FEA_TYP_CD FROM " \
                 "(SELECT A.* FROM " \
                 "(SELECT NVP_BID_NR, SVC_GRP_NR, RCM_NCV_QY, NCV_MIN_QY, NCV_MAX_QY FROM TNCVCEL " \
                 "WHERE NVP_BID_NR = '%s') A " \
                 "INNER JOIN V_TP20_CEILING_SERVICES B " \
                 "ON A.NVP_BID_NR = B.NVP_BID_NR AND A.SVC_GRP_NR = B.SVC_GRP_NR) C " \
                 "INNER JOIN V_TP20_SERVICE_GROUP D ON C.NVP_BID_NR = D.NVP_BID_NR AND C.SVC_GRP_NR = D.SVC_GRP_NR" % self.bid_number
            df_tncvcel = pd.read_sql(q8, con=connection)

            # tp_accessorial
            q9 = "SELECT * FROM V_TP20_ACCESSORIAL WHERE NVP_BID_NR = '%s' " % self.bid_number
            df_tp_accessorial = pd.read_sql(q9, con=connection)

            import joblib
            import tempfile
            import os
            outfile = os.path.join(tempfile.gettempdir(),
                                   '%s.data' % self.bid_number)
            joblib.dump([
                df_tp20_bid, df_tp20_bid_shpr, df_tp20_svc_grp,
                df_tp20_ceiling_svc, df_tp20_shpr_svc, df_ttpsvgp,
                df_zone_weight, df_tp_accessorial
            ],
                        outfile,
                        compress=5)
            if (self.validate_data(df_tp20_bid, df_tp20_bid_shpr,
                                   df_tp20_svc_grp, df_tp20_ceiling_svc,
                                   df_tp20_shpr_svc, df_ttpsvgp,
                                   df_zone_weight, df_tp_accessorial)):
                return df_tp20_bid, df_tp20_bid_shpr, df_tp20_svc_grp, df_tp20_ceiling_svc, df_tp20_shpr_svc, df_ttpsvgp, df_zone_weight, df_tp_accessorial
            else:
                self.log.critical(
                    'Data validation for the bid number %s failed.' %
                    (self.bid_number))
        else:
            self.log.critical('Unable to access the databse.')