def run_inference(num_observations: int = 1000): """Run xgboost for specified number of observations""" # Load data train_x_df = common.get_test_data_df(X=common.X_dfc, size=num_observations) train_y = common.get_test_data_yc(size=num_observations) num_rows = len(train_x_df) ###################### print("_______________________________________") print("Total Number of Rows", num_rows) run_times = [] inference_times = [] for _ in range(NUM_LOOPS): start_time = timer() reg = LogisticRegression().fit(train_x_df, train_y) end_time = timer() total_time = end_time - start_time run_times.append(total_time * 10e3) inference_time = total_time * (10e6) / num_rows inference_times.append(inference_time) return_elem = common.calculate_stats(inference_times) print(num_observations, ", ", return_elem) return return_elem
def run_inference(num_observations:int = 1000): """Run xgboost for specified number of observations""" # Load data test_df = common.get_test_data_df(X=common.X_dfc,size = num_observations) num_rows = len(test_df) ###################### print("_______________________________________") print("Total Number of Rows", num_rows) run_times = [] inference_times = [] for _ in range(NUM_LOOPS): start_time = timer() cluster = DBSCAN(eps=0.3, min_samples=10) cluster.fit(test_df) #predictor.compute(data, MODEL) end_time = timer() total_time = end_time - start_time run_times.append(total_time*10e3) inference_time = total_time*(10e6)/num_rows inference_times.append(inference_time) return_elem = common.calculate_stats(inference_times) print(num_observations, ", ", return_elem) return return_elem
def run_inference(num_observations: int = 1000): """Run xgboost for specified number of observations""" # Load data test_df = common.get_test_data(num_observations) #test_df = common.get_test_data(num_observations) #data = pd.DataFrame(test_df, dtype=np.float32) #predictor = d4p.gbt_regression_prediction(**PARAMS) num_rows = len(test_df) run_times = [] inference_times = [] for _ in range(NUM_LOOPS): start_time = timer() daal_predict_algo = d4p.gbt_regression_prediction(fptype='float') daal_prediction = daal_predict_algo.compute(test_df, daal_model) #predictor.compute(data, MODEL) end_time = timer() total_time = end_time - start_time run_times.append(total_time * 10e3) inference_time = total_time * (10e6) / num_rows inference_times.append(inference_time) print(num_observations, ", ", common.calculate_stats(inference_times))
def run_inference(num_observations: int = 1000): """Run xgboost for specified number of observations""" # Load data test_df = common.get_test_data_df(X=common.X_dfc, size=num_observations) num_rows = len(test_df) ###################### print("_______________________________________") print("Total Number of Rows", num_rows) run_times = [] inference_times = [] for _ in range(NUM_LOOPS): start_time = timer() predict_algo = d4p.logistic_regression_prediction( nClasses=2, resultsToEvaluate= "computeClassLabels|computeClassProbabilities|computeClassLogProbabilities" ) predict_result = predict_algo.compute(test_df, train_result.model) #predictor.compute(data, MODEL) end_time = timer() total_time = end_time - start_time run_times.append(total_time * 10e3) inference_time = total_time * (10e6) / num_rows inference_times.append(inference_time) return_elem = common.calculate_stats(inference_times) print(num_observations, ", ", return_elem) return return_elem
def run_inference(num_observations:int = 1000): """Run xgboost for specified number of observations""" # Load data test_df = common.get_test_data_df(X=common.X_dfc,size = num_observations) num_rows = len(test_df) ###################### print("_______________________________________") print("Total Number of Rows", num_rows) run_times = [] inference_times = [] for _ in range(NUM_LOOPS): start_time = timer() init_alg = d4p.kmeans_init(nClusters = 5, fptype = "float", method = "randomDense") centroids = init_alg.compute(test_df).centroids alg = d4p.kmeans(nClusters = 5, maxIterations = 100, fptype = "float", accuracyThreshold = 0, assignFlag = False) result = alg.compute((test_df), centroids) end_time = timer() total_time = end_time - start_time run_times.append(total_time*10e3) inference_time = total_time*(10e6)/num_rows inference_times.append(inference_time) return_elem = common.calculate_stats(inference_times) print(num_observations, ", ", return_elem) return return_elem
def run_inference(num_observations: int = 1000): """Run xgboost for specified number of observations""" # Load data train_x_df = common.get_test_data_df(X=common.X_df, size=num_observations) train_y_df = common.get_test_data_df(X=common.y_df, size=num_observations) num_rows = len(train_x_df) ###################### print("_______________________________________") print("Total Number of Rows", num_rows) run_times = [] inference_times = [] for _ in range(NUM_LOOPS): start_time = timer() MODEL = d4p.decision_forest_regression_training(nTrees=100) train_result = MODEL.compute(train_x_df, train_y_df) end_time = timer() total_time = end_time - start_time run_times.append(total_time * 10e3) inference_time = total_time * (10e6) / num_rows inference_times.append(inference_time) return_elem = common.calculate_stats(inference_times) print(num_observations, ", ", return_elem) return return_elem
def run_inference(num_observations: int = 1000): """Run xgboost for specified number of observations""" # Load data test_df = common.get_test_data_df(X=common.X_dfc, size=num_observations) num_rows = len(test_df) ###################### print("_______________________________________") print("Total Number of Rows", num_rows) run_times = [] inference_times = [] for _ in range(NUM_LOOPS): start_time = timer() cluster = KMeans(n_clusters=5, **kmeans_kwargs) cluster.fit(test_df) end_time = timer() total_time = end_time - start_time run_times.append(total_time * 10e3) inference_time = total_time * (10e6) / num_rows inference_times.append(inference_time) return_elem = common.calculate_stats(inference_times) print(num_observations, ", ", return_elem) return return_elem
def run_inference(num_observations:int = 1000): """Run xgboost for specified number of observations""" # Load data test_df = common.get_test_data(num_observations) num_rows = len(test_df) # print(f"Running {NUM_LOOPS} inference loops with batch size {num_rows}...") run_times3 = [] inference_times3 = [] for _ in range(NUM_LOOPS): start_time = timer() data = xgb.DMatrix(test_df) MODEL.predict(data) end_time = timer() total_time3 = end_time - start_time run_times3.append(total_time3*10e3) inference_time3 = total_time3*(10e6)/num_rows inference_times3.append(inference_time3) print(num_observations, ", ", common.calculate_stats(inference_times3))
# main module if __name__ == '__main__': # Run try: logging.info(f"Loading model {args.model}") model = importlib.import_module('models.' + args.model) except ModuleNotFoundError: logging.error(f"Model {args.model} not found.") quit() if args.train: logging.info(f"Running training benchmark for {args.model}...") logging.info(common.get_header()) logging.info(common.get_underline()) batch_size = 10 while batch_size <= args.observations: total_times, observation_times = model.run_training(batch_size) stats = common.calculate_stats(observation_times) logging.info(common.format_stats(batch_size, stats)) batch_size *= 10 # else: # logging.info(f"Running testing benchmark for {args.model}...") # logging.info(common.STATS) # batch_size = 1 # while batch_size <= args.observations: # model.run_inference(batch_size) # batch_size *= 10 else: logging.error(f"Could not find benchmark for {model}")