def waitForMoreRowsUsingSQL(SOS, sos_host, sos_port, prior_frame_max): max_frame = checkLatestFrameUsingSQL(SOS, sos_host, sos_port, prior_frame_max) log(2, "Waiting for %d frames of new data..." % FRAME_INTERVAL) print("waiting for data...", flush=True) # ggout while (max_frame < (prior_frame_max + FRAME_INTERVAL)): time.sleep(1) # ggout ggin max_frame = checkLatestFrameUsingSQL(SOS, sos_host, sos_port, prior_frame_max) #print("Max frame: ", max_frame, flush=True) # ggout log(2, "Enough frames have arrived at SOS. max_frame == %d" % max_frame) return max_frame
def waitForMoreRows(SOS, sos_host, sos_port, prior_frame_max): if (ONCE_THEN_EXIT): log(2, "Using ONCE_THEN_EXIT mode, not waiting for new rows.") return prior_frame_max log(2, "Waiting for data...") max_frame, results, col_names = \ SOS.request_pub_manifest("", sos_host, sos_port) while (max_frame < (prior_frame_max + FRAME_INTERVAL)): time.sleep(1) max_frame, results, col_names = \ SOS.request_pub_manifest("", sos_host, sos_port) ##### return max_frame
def main(): controller_start = time.time() SOS = SSOS() SOS.init() sos_host = "localhost" sos_port = os.environ.get("SOS_CMD_PORT") step = 0 prior_frame_max = 0 log(1, "Online.") print("CREATE VIEW", flush=True) # ggout query.createApolloView(SOS, sos_host, sos_port) #log(1, "Wiping all prior data in the SOS database...") print("WIPE DATA", flush=True) # ggout query.wipeAllExistingData(SOS, sos_host, sos_port) prev_dtree_def = None triggers = 0 while (os.environ.get("SOS_SHUTDOWN") != "TRUE"): # Clearing prior training data # query.wipeTrainingData(SOS, sos_host, sos_port, prior_frame_max) prior_frame_max = query.waitForMoreRowsUsingSQL( SOS, sos_host, sos_port, prior_frame_max) data, region_names = query.getTrainingData(SOS, sos_host, sos_port, row_limit=0) #print('data', data) #print('region_names', region_names) dataset_guid = SOS.get_guid() data.to_pickle("./output/models/step.%d.trainingdata.pickle" % prior_frame_max) with open( ("./output/models/step.%d.region_names.pickle" % prior_frame_max), "wb") as f: pickle.dump(region_names, f) print("Pickled step ", prior_frame_max) # ggout continue # ggout # Model: DecisionTree dtree_def, dtree_skl = trees.generateDecisionTree( log, data, assign_guid=dataset_guid, tree_max_depth=3, one_big_tree=False) dtree_len = len(dtree_def) if (dtree_len < 1): log(0, "No models generated for step %d." % prior_frame_max) else: with open(("./output/models/step.%d.model.json" % prior_frame_max), "w") as f: f.write(dtree_def) if True: #prev_dtree_def == None or prev_dtree_def != dtree_def: prev_dtree_def = dtree_def #SOS.trigger("APOLLO_MODELS", dtree_len, dtree_def) triggers += 1 print("===> Trigger ", triggers, " because models differ", flush=True) # ggout # Model: RegressionTree #rtree_skl = trees.generateRegressionTree(log, data, # assign_guid=dataset_guid, # tree_max_depth=3, # one_big_tree=False) # TODO(chad): Add NN models / streaming models here # TODO(chad): Drop models into an arena to fight, and only send models # out when they are better than some prior model for any # given loop. Could use async queues for analysis and for # model distribution. if dtree_len > 0: if (ONCE_THEN_EXIT): controller_elapsed = time.time() - controller_start log( 1, "Done. Full cycle of controller took " + str(controller_elapsed) + "seconds.") return else: if (VERBOSE): log(1, "NOTICE: Model was not generated, nothing to send.") if (ONCE_THEN_EXIT): log(1, "Done.") return step += 1 ##### return to top of loop until shut down ##### ########## end of controller.py ########## log(1, "Done.") return
def getTrainingData(SOS, sos_host, sos_port, row_limit=0): ##### # # NOTE: We can split the results up in Pandas to train each region # independently, we don't need to submit that as multiple queries # just yet. # # We do grab the list of region names so we can append it to # the encoding of the model, for now. # log(2, "Submitting SQL queries ...") # # Old slow way: #sql_string = "SELECT DISTINCT region_name FROM viewApollo WHERE region_name IS NOT NULL;" # sql_string = """\ SELECT DISTINCT(tblVals.val) FROM tblVals LEFT OUTER JOIN tblData ON tblVals.guid = tblData.guid WHERE tblData.name LIKE "region_name" AND tblVals.val IS NOT NULL; """ names_start = time.time() region_name_list, col_names = SOS.query(sql_string, sos_host, sos_port) names_elapsed = time.time() - names_start log(2, "Apollo::Region list (" + str(len(region_name_list))\ + " x " + str(len(col_names)) + ") retrieved in " + str(names_elapsed)\ + " seconds.") region_names = [] for nm in region_name_list: region_names.append(str(nm[0])) ### # # Now get the full training set: # # # Example of a faster version of this query: # select region_name, policy_index, min(time_avg) from viewApollo2 # where step > 90 and exec_count > 10 and num_elements > 9 group by region_name; # #sql_string = """\ # SELECT # * # FROM # viewApollo # ; # """ sql_string = """\ SELECT region_name, policy_index, step, num_elements, MIN(time_avg) AS time_avg FROM viewApollo GROUP BY region_name, num_elements, step ; """ # Skip the regrid steps: # sql_string = """\ # SELECT * # FROM viewApollo # WHERE ( ((frame + 1) % 10 != 0) # AND (region_name IS NOT NULL) # ) # """ if (row_limit < 1): sql_string += ";" else: sql_string += "LIMIT " + str(row_limit) + ";" view_start = time.time() results, col_names = SOS.query(sql_string, sos_host, sos_port) view_elapsed = time.time() - view_start log(2, "viewApollo data (" + str(len(results))\ + " x " + str(len(col_names)) + ") retrieved in " + str(names_elapsed)\ + " seconds.") convert_start = time.time() data = pd.DataFrame.from_records(results, columns=col_names) convert_elapsed = time.time() - convert_start log(2, "Converted to DataFrame in " + str(convert_elapsed) + " seconds.") return data, region_names
def main(): controller_start = time.time() SOS = SSOS() SOS.init() sos_host = "localhost" sos_port = os.environ.get("SOS_CMD_PORT") step = 0 prior_frame_max = 0 log(1, "Online.") query.createApolloView(SOS, sos_host, sos_port) #log(1, "Wiping all prior data in the SOS database...") #query.wipeAllExistingData(SOS, sos_host, sos_port) data = {} while (os.environ.get("SOS_SHUTDOWN") != "TRUE"): # Clearing prior training data from SOS # query.wipeTrainingData(SOS, sos_host, sos_port, prior_frame_max) data['prior_frame_max'] = \ query.waitForMoreRowsUsingSQL(SOS, sos_host, sos_port, prior_frame_max) data['latest_query_rows'], data['latest_region_names'] = \ query.getTrainingData(SOS, sos_host, sos_port, row_limit=0) pickle_latest_data(cargo) dataset_guid = SOS.get_guid() # Model: RegressionTree data['rtree_skl'] = trees.generateRegressionTree( log, data, assign_guid=dataset_guid, tree_max_depth=3, one_big_tree=False) # Model: DecisionTree data['dtree_def'], data['dtree_skl'] = \ trees.generateDecisionTree( log, data, assign_guid=dataset_guid, tree_max_depth=3, one_big_tree=False) # TODO(chad): Bootstrap conditions VS. active monitoring conditions # Analyze the data coming in compared to existing rtree data['model_pkg_json'] = guide.analyzePerformance(data) # TODO(chad): Ship out the model package. if (ONCE_THEN_EXIT): controller_elapsed = time.time() - controller_start log(1, "Done. Full cycle of controller took " \ + str(controller_elapsed) + "seconds.") return step += 1 ##### return to top of loop until shut down ##### ########## end of controller.py ########## log(1, "Done.") return