def infer_loop(model, input, output_file, stats_interval, conf_thresh, conf_percent): output = open(output_file, "w") # Initialize statistics total_predictions = 0 low_confidence_predictions = 0 categories = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] prediction_hist = [] for i in range(0, len(categories)): prediction_hist.append(0) ### MLOPS start # Create a bar graph and table for reporting prediction distributions and set the column names infer_bar = BarGraph().name("Prediction Distribution Bar Graph").cols(categories) infer_tbl = Table().name("Prediction Distribution Table").cols(categories) ### MLOPS end while True: try: sample, label = input.get_next_input() sample_np = ny.array(sample).reshape(1, -1) # The prediction is the class with the highest probability prediction = model.predict(sample_np) # Append the prediction to the output file output.write("{}\n".format(prediction)) # Calculate statistics total_predictions += 1 prediction_hist[ny.int(prediction[0])] += 1 # Report statistics if total_predictions % stats_interval == 0: # Report the prediction distribution for i in range(0, len(categories)): print("category: {} predictions: {}".format(categories[i], prediction_hist[i])) ### MLOPS start # Show the prediction distribution as a table infer_tbl.add_row(str(total_predictions), prediction_hist) # Show the prediction distribution as a bar graph infer_bar.data(prediction_hist) except EOFError: # stop when we hit end of input # Report the stats mlops.set_stat(infer_tbl) mlops.set_stat(infer_bar) ### MLOPS end output.close() ### MLOPS start mlops.done() ### MLOPS end break
def infer_loop(model, input, output_file, stats_interval, conf_tracker): output = open(output_file, "w") # Initialize statistics total_predictions = 0 low_confidence_predictions = 0 categories = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] prediction_hist = [] for i in range(0, model.get_num_categories()): prediction_hist.append(0) ### MLOPS start # Create a bar graph and table for reporting prediction distributions and set the column names infer_bar = BarGraph().name("Prediction Distribution Bar Graph").cols( categories) infer_tbl = Table().name("Prediction Distribution Table").cols(categories) ### MLOPS end while True: try: sample, label = input.get_next_input() # Get the inference. This is an array of probabilities for each output value. inference = model.infer(sample) # The prediction is the class with the highest probability prediction = ny.argmax(inference) # The confidence for that prediction confidence = inference[prediction] * 100 # Append the prediction to the output file output.write("{}\n".format(prediction)) # Calculate statistics total_predictions += 1 prediction_hist[prediction] += 1 conf_tracker.check_confidence(confidence, sample) # Report statistics if total_predictions % stats_interval == 0: # Report the prediction distribution for i in range(0, model.get_num_categories()): print("category: {} predictions: {}".format( categories[i], prediction_hist[i])) ### MLOPS start # Update total prediction count with the all new predictions since we last reported mlops.set_stat(PredefinedStats.PREDICTIONS_COUNT, stats_interval) # Show the prediction distribution as a table infer_tbl.add_row(str(total_predictions), prediction_hist) # Show the prediction distribution as a bar graph infer_bar.data(prediction_hist) # Report the stats mlops.set_stat(infer_tbl) mlops.set_stat(infer_bar) ### MLOPS end conf_tracker.report_confidence(stats_interval) except EOFError: # stop when we hit end of input print("Reached end of input") output.close() break
class ConfidenceTracker(object): def __init__(self, track_conf, conf_thresh, conf_percent, output): self._track_conf = track_conf self._conf_thresh = conf_thresh self._conf_percent = conf_percent self._output_low_confidence_predictions = output self._low_confidence_predictions = 0 print("track_conf: {}".format(track_conf)) if track_conf > 0: print("conf_thresh: {}".format(conf_thresh)) print("conf_percent: {}".format(conf_percent)) categories = [ "10", "20", "30", "40", "50", "60", "70", "80", "90", "100" ] self._conf_hist = [] for i in range(0, 10): self._conf_hist.append(0) ## MLOps start self._conf_graph = BarGraph().name( "Confidence Distribution Bar Graph").cols(categories) ## MLOps end def check_confidence(self, confidence, sample): if self._track_conf == 0: return conf_bin = int(math.floor(confidence / 10)) # include 100% confidence in the 90-100 range if conf_bin == 10: conf_bin = 9 self._conf_hist[conf_bin] += 1 if confidence < self._conf_thresh: self._low_confidence_predictions += 1 if self._output_low_confidence_predictions != 0: import tensorflow as tf import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt image = tf.reshape(sample, [28, 28]) plotData = sample plotData = plotData.reshape(28, 28) plt.gray( ) # use this line if you don't want to see it in color plt.imshow(plotData) plt.savefig( "/opt/data-lake/image{}_conf{}_prediction{}.png".format( total_predictions, int(round(confidence)), prediction)) def report_confidence(self, total_predictions): if self._track_conf == 0: return ## MLOps start # Show the prediction distribution as a bar graph self._conf_graph.data(self._conf_hist) mlops.set_stat(self._conf_graph) ## MLOps end # Percentage of low confidence predictions in this reporting interval low_conf_percent = self._low_confidence_predictions * 100.0 / total_predictions print("low confidence predictions: {} ({})%".format( self._low_confidence_predictions, low_conf_percent)) if low_conf_percent > self._conf_percent: msg = "Low confidence: {}% of inferences had confidence below {}%".format( low_conf_percent, self._conf_thresh) print(msg) ## MLOps start mlops.health_alert("Low confidence alert", msg) ## MLOps end # reset counters for next round for i in range(0, 9): self._conf_hist[i] = 0 self._low_confidence_predictions = 0
class CategoricalStatistics(InferenceStatistics): def __init__(self, print_interval, stats_type, num_categories, conf_thresh, conf_percent, hot_label=True): super(CategoricalStatistics, self).__init__(print_interval) self._num_categories = num_categories self._hot_label = hot_label self._stats_type = stats_type self._conf_thresh = conf_thresh / 100.0 self._conf_percent = conf_percent # These are useful for development, but should be replaced by mlops library functions self._label_hist = [] self._infer_hist = [] for i in range(0, self._num_categories): self._label_hist.append(0) self._infer_hist.append(0) if self._stats_type == "python": mlops.init(ctx=None, connect_mlops=True, mlops_mode=MLOpsMode.AGENT) elif self._stats_type == "file": mlops.init(ctx=None, connect_mlops=False, mlops_mode=MLOpsMode.STAND_ALONE) else: self._stats_type = "none" if self._stats_type != "none": column_names = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] self._infer_tbl = Table().name("categories").cols(column_names) self._infer_bar = BarGraph().name("categories bar").cols( column_names) def infer_stats(self, sample, label, inference): # for now, we only process 1 inference at a time inference = inference[0] prediction = ny.argmax(inference) confidence = inference[prediction] if confidence < self._conf_thresh: self.increment_low_conf() self._infer_hist[prediction] += 1 if label is not None: if (self._hot_label): label = ny.argmax(label) self._label_hist[label] += 1 if prediction == label: self.increment_correct() self.increment_total() if self.is_time_to_report(): self.report_stats() return prediction def report_stats(self): # what percentage of the predictions had confidences less than the threshold low_conf_percent = self.get_low_conf( ) * 100.0 / self.get_report_interval() if low_conf_percent > self._conf_percent: mlops.health_alert( "Low confidence alert", "{}% of inferences had confidence below {}%".format( low_conf_percent, self._conf_thresh * 100)) for i in range(0, self._num_categories): print(i, "label_total =", self._label_hist[i], "infer_total = ", self._infer_hist[i]) print("total = ", self.get_total(), "total_correct = ", self.get_correct()) category_data = [ self._infer_hist[0], self._infer_hist[1], self._infer_hist[2], self._infer_hist[3], self._infer_hist[4], self._infer_hist[5], self._infer_hist[6], self._infer_hist[7], self._infer_hist[8], self._infer_hist[9] ] self._infer_tbl.add_row(str(self.get_cum_total()), category_data) self._infer_bar.data(category_data) if self._stats_type != "none": mlops.set_stat("correct_percent", self.get_correct() * 100.0 / self.get_total()) mlops.set_stat(self._infer_tbl) mlops.set_stat(self._infer_bar) # Update total prediction count with the all new predictions since we last reported. mlops.set_stat(PredefinedStats.PREDICTIONS_COUNT, self.get_report_interval()) print("Completed {} predictions".format( self.get_report_interval())) self.reset() def __del__(self): mlops.done() super(CategoricalStatistics, self).__del__()