def test_custom_transform_server( self, resources, framework, problem, language, docker, tmp_path, use_arrow, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( resources.target_types(problem), resources.class_labels(framework, problem), custom_model_dir, docker, ) as run: input_dataset = resources.datasets(framework, problem) # do predictions files = {"X": open(input_dataset)} if use_arrow: files["arrow_version"] = ".2" response = requests.post(run.url_server_address + "/transform/", files=files) print(response.text) assert response.ok in_data = pd.read_csv(input_dataset) if framework == SKLEARN_TRANSFORM_DENSE: if use_arrow: transformed_out = read_arrow_payload(eval(response.text)) assert eval(response.text)["out.format"] == "arrow" else: transformed_out = read_csv_payload(eval(response.text)) assert eval(response.text)["out.format"] == "csv" actual_num_predictions = transformed_out.shape[0] else: transformed_out = read_mtx_payload(eval(response.text)) actual_num_predictions = transformed_out.shape[0] assert eval(response.text)["out.format"] == "sparse" validate_transformed_output( transformed_out, should_be_sparse=framework == SKLEARN_TRANSFORM) assert in_data.shape[0] == actual_num_predictions
def test_custom_transforms_with_drum_nginx_prediction_server( self, resources, framework, problem, language, docker, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( resources.target_types(problem), resources.class_labels(framework, problem), custom_model_dir, docker, nginx=True, ) as run: input_dataset = resources.datasets(framework, problem) # do predictions response = requests.post(run.url_server_address + "/transform/", files={"X": open(input_dataset)}) assert response.ok in_data = pd.read_csv(input_dataset) parsed_response = parse_multi_part_response(response) transformed_mat = read_mtx_payload(parsed_response, X_TRANSFORM_KEY) actual_num_predictions = transformed_mat.shape[0] assert in_data.shape[0] == actual_num_predictions
def check_prediction_side_effects(self): rtol = 2e-02 atol = 1e-06 input_extension = os.path.splitext(self.options.input) is_sparse = input_extension[1] == ".mtx" if is_sparse: df = pd.DataFrame(mmread(self.options.input).tocsr()) samplesize = min(1000, max(int(len(df) * 0.1), 10)) data_subset = df.sample(n=samplesize, random_state=42) _, __tempfile_sample = mkstemp(suffix=".mtx") sparse_mat = vstack(x[0] for x in data_subset.values) mmwrite(__tempfile_sample, sparse_mat) else: df = pd.read_csv(self.options.input) samplesize = min(1000, max(int(len(df) * 0.1), 10)) data_subset = df.sample(n=samplesize, random_state=42) _, __tempfile_sample = mkstemp(suffix=".csv") data_subset.to_csv(__tempfile_sample, index=False) if self.target_type == TargetType.BINARY: labels = [self.options.negative_class_label, self.options.positive_class_label] elif self.target_type == TargetType.MULTICLASS: labels = self.options.class_labels else: labels = None with DrumServerRun( self.target_type.value, labels, self.options.code_dir, ) as run: response_key = ( X_TRANSFORM_KEY if self.target_type == TargetType.TRANSFORM else RESPONSE_PREDICTIONS_KEY ) endpoint = "/transform/" if self.target_type == TargetType.TRANSFORM else "/predict/" response_full = requests.post( run.url_server_address + endpoint, files={"X": open(self.options.input)} ) response_sample = requests.post( run.url_server_address + endpoint, files={"X": open(__tempfile_sample)} ) if self.target_type == TargetType.TRANSFORM: if is_sparse: preds_full = pd.DataFrame(read_mtx_payload(eval(response_full.text))) preds_sample = pd.DataFrame(read_mtx_payload(eval(response_sample.text))) else: preds_full = read_csv_payload(eval(response_full.text)) preds_sample = read_csv_payload(eval(response_sample.text)) else: preds_full = pd.DataFrame(json.loads(response_full.text)[response_key]) preds_sample = pd.DataFrame(json.loads(response_sample.text)[response_key]) preds_full_subset = preds_full.iloc[data_subset.index] matches = np.isclose(preds_full_subset, preds_sample, rtol=rtol, atol=atol) if not np.all(matches): message = """ Error: Your predictions were different when we tried to predict twice. No randomness is allowed. The last 10 predictions from the main predict run were: {} However when we reran predictions on the same data, we got: {}. The sample used to calculate prediction reruns can be found in this file: {}""".format( preds_full_subset[~matches][:10], preds_sample[~matches][:10], __tempfile_sample ) raise ValueError(message) else: os.remove(__tempfile_sample)
def test_custom_transform_server( self, resources, framework, problem, language, docker, tmp_path, use_arrow, pass_target, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( resources.target_types(problem), resources.class_labels(framework, problem), custom_model_dir, docker, ) as run: input_dataset = resources.datasets(framework, problem) in_data = pd.read_csv(input_dataset) files = {"X": open(input_dataset)} if pass_target: target_dataset = resources.targets(problem) files["y"] = open(target_dataset) if use_arrow: files["arrow_version"] = ".2" response = requests.post(run.url_server_address + "/transform/", files=files) assert response.ok parsed_response = parse_multi_part_response(response) if framework == SKLEARN_TRANSFORM_DENSE: if use_arrow: transformed_out = read_arrow_payload( parsed_response, X_TRANSFORM_KEY) if pass_target: target_out = read_arrow_payload( parsed_response, Y_TRANSFORM_KEY) assert parsed_response["X.format"] == "arrow" if pass_target: assert parsed_response["y.format"] == "arrow" else: transformed_out = read_csv_payload(parsed_response, X_TRANSFORM_KEY) if pass_target: target_out = read_csv_payload(parsed_response, Y_TRANSFORM_KEY) assert parsed_response["X.format"] == "csv" if pass_target: assert parsed_response["y.format"] == "csv" actual_num_predictions = transformed_out.shape[0] else: transformed_out = read_mtx_payload(parsed_response, X_TRANSFORM_KEY) colnames = parsed_response["X.colnames"].decode("utf-8").split( "\n") assert len(colnames) == transformed_out.shape[1] if pass_target: # this shouldn't be sparse even though features are if use_arrow: target_out = read_arrow_payload( parsed_response, Y_TRANSFORM_KEY) if pass_target: assert parsed_response["y.format"] == "arrow" else: target_out = read_csv_payload(parsed_response, Y_TRANSFORM_KEY) if pass_target: assert parsed_response["y.format"] == "csv" actual_num_predictions = transformed_out.shape[0] assert parsed_response["X.format"] == "sparse" validate_transformed_output( transformed_out, should_be_sparse=framework == SKLEARN_TRANSFORM) if pass_target: assert all(pd.read_csv(target_dataset) == target_out) assert in_data.shape[0] == actual_num_predictions
def load_transform_output(response, is_sparse, request_key): parsed_response = parse_multi_part_response(response) if is_sparse: return pd.DataFrame(read_mtx_payload(parsed_response, request_key)) else: return pd.DataFrame(read_csv_payload(parsed_response, request_key))