def test_kubernetes(clipper_conn, num_apps, num_models): time.sleep(10) print(clipper_conn.cm.get_query_addr()) print(clipper_conn.inspect_instance()) try: logger.info("Running integration test with %d apps and %d models" % (num_apps, num_models)) for a in range(num_apps): create_and_test_app(clipper_conn, "testapp%s" % a, num_models) if not os.path.exists(CLIPPER_TEMP_DIR): os.makedirs(CLIPPER_TEMP_DIR) tmp_log_dir = tempfile.mkdtemp(dir=CLIPPER_TEMP_DIR) logger.info(clipper_conn.get_clipper_logs(tmp_log_dir)) # Remove temp files shutil.rmtree(tmp_log_dir) log_clipper_state(clipper_conn) logger.info("SUCCESS") except BenchmarkException: log_clipper_state(clipper_conn) logger.exception("BenchmarkException") create_kubernetes_connection(cleanup=True, start_clipper=False, connect=False) sys.exit(1) except ClipperException: log_clipper_state(clipper_conn) logger.exception("ClipperException") create_kubernetes_connection(cleanup=True, start_clipper=False, connect=False) sys.exit(1)
'objective': 'binary:logistic' } watchlist = [(dtrain, 'train')] num_round = 2 bst = xgb.train(param, dtrain, num_round, watchlist) def predict(xs): return [str(bst.predict(xgb.DMatrix(xs)))] deploy_and_test_model(clipper_conn, bst, version, predict, link_model=True) except BenchmarkException as e: log_clipper_state(clipper_conn) logger.exception("BenchmarkException") clipper_conn = create_docker_connection(cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) else: clipper_conn = create_docker_connection(cleanup=True, start_clipper=False, cleanup_name=cluster_name) except Exception as e: logger.exception("Exception") clipper_conn = create_docker_connection(cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1)
def run_test(): spark = SparkSession\ .builder\ .appName("clipper-pyspark")\ .getOrCreate() training = spark.createDataFrame([(0, "a b c d e spark", 1.0), (1, "b d", 0.0), (2, "spark f g h", 1.0), (3, "hadoop mapreduce", 0.0)], columns + ["label"]) # Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr. tokenizer = Tokenizer(inputCol="text", outputCol="words") hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features") lr = LogisticRegression(maxIter=10, regParam=0.001) pipeline = Pipeline(stages=[tokenizer, hashingTF, lr]) # Fit the pipeline to training documents. model = pipeline.fit(training) # Prepare test documents, which are unlabeled (id, text) tuples. test = spark.createDataFrame([(4, "spark i j k"), (5, "l m n"), (6, "spark hadoop spark"), (7, "apache hadoop")], columns) # Make predictions on test documents and print columns of interest. prediction = model.transform(test) selected = prediction.select("id", "text", "probability", "prediction") for row in selected.collect(): rid, text, prob, prediction = row print("(%d, %s) --> prob=%s, prediction=%f" % (rid, text, str(prob), prediction)) # test predict function print( predict(spark, model, [json.dumps((np.random.randint(1000), "spark abcd"))])) try: clipper_conn = create_docker_connection(cleanup=True, start_clipper=True) try: clipper_conn.register_application(app_name, "strings", "default_pred", 10000000) time.sleep(1) addr = clipper_conn.get_query_addr() response = requests.post("http://%s/%s/predict" % (addr, app_name), headers=headers, data=json.dumps({ 'input': json.dumps((np.random.randint(1000), "spark abcd")) })) result = response.json() if response.status_code != requests.codes.ok: print("Error: %s" % response.text) raise BenchmarkException("Error creating app %s" % app_name) version = 1 deploy_pyspark_model(clipper_conn, model_name, version, "strings", predict, model, spark.sparkContext) clipper_conn.link_model_to_app(app_name, model_name) time.sleep(30) num_preds = 25 num_defaults = 0 addr = clipper_conn.get_query_addr() for i in range(num_preds): response = requests.post( "http://%s/%s/predict" % (addr, app_name), headers=headers, data=json.dumps({ 'input': json.dumps((np.random.randint(1000), "spark abcd")) })) result = response.json() if response.status_code == requests.codes.ok and result[ "default"]: num_defaults += 1 if num_defaults > 0: print("Error: %d/%d predictions were default" % (num_defaults, num_preds)) if num_defaults > num_preds / 2: raise BenchmarkException("Error querying APP %s, MODEL %s:%d" % (app_name, model_name, version)) version += 1 deploy_pyspark_model(clipper_conn, model_name, version, "strings", predict, model, spark.sparkContext) time.sleep(30) num_preds = 25 num_defaults = 0 addr = clipper_conn.get_query_addr() for i in range(num_preds): response = requests.post( "http://%s/%s/predict" % (addr, app_name), headers=headers, data=json.dumps({ 'input': json.dumps((np.random.randint(1000), "spark abcd")) })) result = response.json() if response.status_code == requests.codes.ok and result[ "default"]: num_defaults += 1 if num_defaults > 0: print("Error: %d/%d predictions were default" % (num_defaults, num_preds)) if num_defaults > num_preds / 2: raise BenchmarkException("Error querying APP %s, MODEL %s:%d" % (app_name, model_name, version)) except BenchmarkException as e: log_docker(clipper_conn) log_clipper_state(clipper_conn) logger.exception("BenchmarkException") clipper_conn = create_docker_connection(cleanup=True, start_clipper=False) sys.exit(1) else: spark.stop() clipper_conn = create_docker_connection(cleanup=True, start_clipper=False) logger.info("ALL TESTS PASSED") except Exception as e: log_docker(clipper_conn) logger.exception("Exception") clipper_conn = create_docker_connection(cleanup=True, start_clipper=False) sys.exit(1)