Пример #1
0
def waitForMoreRowsUsingSQL(SOS, sos_host, sos_port, prior_frame_max):
    max_frame = checkLatestFrameUsingSQL(SOS, sos_host, sos_port,
                                         prior_frame_max)
    log(2, "Waiting for %d frames of new data..." % FRAME_INTERVAL)
    print("waiting for data...", flush=True)
    # ggout
    while (max_frame < (prior_frame_max + FRAME_INTERVAL)):
        time.sleep(1)  # ggout ggin
        max_frame = checkLatestFrameUsingSQL(SOS, sos_host, sos_port,
                                             prior_frame_max)
        #print("Max frame: ", max_frame, flush=True) # ggout
    log(2, "Enough frames have arrived at SOS.  max_frame == %d" % max_frame)
    return max_frame
Пример #2
0
def waitForMoreRows(SOS, sos_host, sos_port, prior_frame_max):
    if (ONCE_THEN_EXIT):
        log(2, "Using ONCE_THEN_EXIT mode, not waiting for new rows.")
        return prior_frame_max

    log(2, "Waiting for data...")

    max_frame, results, col_names = \
            SOS.request_pub_manifest("", sos_host, sos_port)

    while (max_frame < (prior_frame_max + FRAME_INTERVAL)):
        time.sleep(1)
        max_frame, results, col_names = \
            SOS.request_pub_manifest("", sos_host, sos_port)

    #####
    return max_frame
Пример #3
0
def main():
    controller_start = time.time()
    SOS = SSOS()
    SOS.init()

    sos_host = "localhost"
    sos_port = os.environ.get("SOS_CMD_PORT")

    step = 0
    prior_frame_max = 0

    log(1, "Online.")
    print("CREATE VIEW", flush=True)  # ggout
    query.createApolloView(SOS, sos_host, sos_port)

    #log(1, "Wiping all prior data in the SOS database...")
    print("WIPE DATA", flush=True)  # ggout
    query.wipeAllExistingData(SOS, sos_host, sos_port)
    prev_dtree_def = None

    triggers = 0
    while (os.environ.get("SOS_SHUTDOWN") != "TRUE"):
        # Clearing prior training data
        # query.wipeTrainingData(SOS, sos_host, sos_port, prior_frame_max)
        prior_frame_max = query.waitForMoreRowsUsingSQL(
            SOS, sos_host, sos_port, prior_frame_max)
        data, region_names = query.getTrainingData(SOS,
                                                   sos_host,
                                                   sos_port,
                                                   row_limit=0)
        #print('data', data)
        #print('region_names', region_names)
        dataset_guid = SOS.get_guid()
        data.to_pickle("./output/models/step.%d.trainingdata.pickle" %
                       prior_frame_max)
        with open(
            ("./output/models/step.%d.region_names.pickle" % prior_frame_max),
                "wb") as f:
            pickle.dump(region_names, f)
        print("Pickled step ", prior_frame_max)  # ggout
        continue
        # ggout

        # Model: DecisionTree
        dtree_def, dtree_skl = trees.generateDecisionTree(
            log,
            data,
            assign_guid=dataset_guid,
            tree_max_depth=3,
            one_big_tree=False)
        dtree_len = len(dtree_def)

        if (dtree_len < 1):
            log(0, "No models generated for step %d." % prior_frame_max)
        else:
            with open(("./output/models/step.%d.model.json" % prior_frame_max),
                      "w") as f:
                f.write(dtree_def)

        if True:  #prev_dtree_def == None or prev_dtree_def != dtree_def:
            prev_dtree_def = dtree_def
            #SOS.trigger("APOLLO_MODELS", dtree_len, dtree_def)
            triggers += 1
            print("===> Trigger ",
                  triggers,
                  " because models differ",
                  flush=True)  # ggout

        # Model: RegressionTree
        #rtree_skl = trees.generateRegressionTree(log, data,
        #        assign_guid=dataset_guid,
        #        tree_max_depth=3,
        #        one_big_tree=False)

        # TODO(chad): Add NN models / streaming models here

        # TODO(chad): Drop models into an arena to fight, and only send models
        #             out when they are better than some prior model for any
        #             given loop. Could use async queues for analysis and for
        #             model distribution.

        if dtree_len > 0:
            if (ONCE_THEN_EXIT):
                controller_elapsed = time.time() - controller_start
                log(
                    1, "Done.  Full cycle of controller took " +
                    str(controller_elapsed) + "seconds.")
                return
        else:
            if (VERBOSE):
                log(1, "NOTICE: Model was not generated, nothing to send.")
            if (ONCE_THEN_EXIT):
                log(1, "Done.")
                return

        step += 1
        ##### return to top of loop until shut down #####

    ########## end of controller.py  ##########
    log(1, "Done.")
    return
Пример #4
0
def getTrainingData(SOS, sos_host, sos_port, row_limit=0):
    #####
    #
    #  NOTE: We can split the results up in Pandas to train each region
    #        independently, we don't need to submit that as multiple queries
    #        just yet.
    #
    #        We do grab the list of region names so we can append it to
    #        the encoding of the model, for now.
    #
    log(2, "Submitting SQL queries ...")
    #
    # Old slow way:
    #sql_string = "SELECT DISTINCT region_name FROM viewApollo WHERE region_name IS NOT NULL;"
    #
    sql_string = """\
            SELECT DISTINCT(tblVals.val)
            FROM tblVals
                LEFT OUTER JOIN tblData ON tblVals.guid = tblData.guid
            WHERE tblData.name LIKE "region_name"
                AND tblVals.val IS NOT NULL;
            """
    names_start = time.time()
    region_name_list, col_names = SOS.query(sql_string, sos_host, sos_port)
    names_elapsed = time.time() - names_start
    log(2, "Apollo::Region list (" + str(len(region_name_list))\
                + " x " + str(len(col_names)) + ") retrieved in " + str(names_elapsed)\
                + " seconds.")
    region_names = []
    for nm in region_name_list:
        region_names.append(str(nm[0]))

    ###
    #
    #  Now get the full training set:
    #
    #
    #  Example of a faster version of this query:
    # select region_name, policy_index, min(time_avg) from viewApollo2
    #      where step > 90 and exec_count > 10 and num_elements > 9 group by region_name;
    #
    #sql_string = """\
    #        SELECT
    #            *
    #        FROM
    #            viewApollo
    #        ;
    #        """
    sql_string = """\
            SELECT
                region_name, policy_index, step, num_elements, MIN(time_avg) AS time_avg
            FROM
                viewApollo
            GROUP BY
                region_name, num_elements, step
            ;
            """

    # Skip the regrid steps:
    #   sql_string = """\
    #       SELECT *
    #       FROM   viewApollo
    #       WHERE ( ((frame + 1) % 10 != 0)
    #           AND (region_name IS NOT NULL)
    #       )
    #       """

    if (row_limit < 1):
        sql_string += ";"
    else:
        sql_string += "LIMIT " + str(row_limit) + ";"

    view_start = time.time()
    results, col_names = SOS.query(sql_string, sos_host, sos_port)
    view_elapsed = time.time() - view_start
    log(2, "viewApollo data (" + str(len(results))\
                    + " x " + str(len(col_names)) + ") retrieved in " + str(names_elapsed)\
                    + " seconds.")

    convert_start = time.time()
    data = pd.DataFrame.from_records(results, columns=col_names)
    convert_elapsed = time.time() - convert_start
    log(2, "Converted to DataFrame in " + str(convert_elapsed) + " seconds.")

    return data, region_names
Пример #5
0
def main():
    controller_start = time.time()
    SOS = SSOS()
    SOS.init()

    sos_host = "localhost"
    sos_port = os.environ.get("SOS_CMD_PORT")

    step = 0
    prior_frame_max = 0

    log(1, "Online.")
    query.createApolloView(SOS, sos_host, sos_port)

    #log(1, "Wiping all prior data in the SOS database...")
    #query.wipeAllExistingData(SOS, sos_host, sos_port)

    data = {}

    while (os.environ.get("SOS_SHUTDOWN") != "TRUE"):
        # Clearing prior training data from SOS
        # query.wipeTrainingData(SOS, sos_host, sos_port, prior_frame_max)
        data['prior_frame_max'] = \
                query.waitForMoreRowsUsingSQL(SOS, sos_host, sos_port, prior_frame_max)
        data['latest_query_rows'], data['latest_region_names'] = \
                query.getTrainingData(SOS, sos_host, sos_port, row_limit=0)

        pickle_latest_data(cargo)
        dataset_guid = SOS.get_guid()

        # Model: RegressionTree
        data['rtree_skl'] = trees.generateRegressionTree(
            log,
            data,
            assign_guid=dataset_guid,
            tree_max_depth=3,
            one_big_tree=False)

        # Model: DecisionTree
        data['dtree_def'], data['dtree_skl'] = \
                trees.generateDecisionTree(
                        log, data, assign_guid=dataset_guid,
                        tree_max_depth=3, one_big_tree=False)

        # TODO(chad): Bootstrap conditions VS. active monitoring conditions

        # Analyze the data coming in compared to existing rtree
        data['model_pkg_json'] = guide.analyzePerformance(data)

        # TODO(chad): Ship out the model package.

        if (ONCE_THEN_EXIT):
            controller_elapsed = time.time() - controller_start
            log(1, "Done.  Full cycle of controller took " \
                    + str(controller_elapsed) + "seconds.")
            return

        step += 1
        ##### return to top of loop until shut down #####

    ########## end of controller.py  ##########
    log(1, "Done.")
    return