def test_model_isolation(self): # Test that we can correctly load two different models with the same entrypoint in the same process with TemporaryDirectory() as test_dir1: with TemporaryDirectory() as test_dir2: path1 = self.package_dummy_model(test_dir1, 1.0) path2 = self.package_dummy_model(test_dir2, 2.0) with load_neuropod(path1, _always_use_native=False) as n1: with load_neuropod(path2, _always_use_native=False) as n2: input_data = {"x": np.array([0], dtype=np.float32)} self.assertEqual(n1.infer(input_data)["out"][0], 1.0) self.assertEqual(n2.infer(input_data)["out"][0], 2.0)
def test_python_deps(self): # Test that we can correctly load two different models with the same dependencies with TemporaryDirectory() as test_dir1: with TemporaryDirectory() as test_dir2: path1 = self.package_sklearn_model(test_dir1) path2 = self.package_sklearn_model(test_dir2) with load_neuropod(path1) as n1: with load_neuropod(path2) as n2: input_data = { "x": np.array([[4, 5]], dtype=np.float64) } self.assertAlmostEqual( n1.infer(input_data)["out"][0], 17) self.assertAlmostEqual( n2.infer(input_data)["out"][0], 17)
def load_and_test_neuropod(neuropod_path, test_input_data, test_expected_out=None, neuropod_load_args={}, **kwargs): """ Loads a neuropod in a new process and verifies that inference runs. If expected output is specified, the output of the model is checked against the expected values. Raises a ValueError if the outputs don't match the expected values """ if RUN_NATIVE_TESTS: # Load the model using native out-of-process execution model = load_neuropod(neuropod_path, **neuropod_load_args) out = model.infer(test_input_data) else: # Run the evaluation in a new process. This is important to make sure # custom ops are being tested correctly args = neuropod_load_args.copy() # By default, we use the native bindings to run the model args["_always_use_native"] = False out = eval_in_new_process(neuropod_path, test_input_data, neuropod_load_args=args) # Check the output if test_expected_out is not None: # Throws a ValueError if the output doesn't match the expected value check_output_matches_expected(out, test_expected_out)
def test_invalid_shape(self): with six.assertRaisesRegex( self, (ValueError, RuntimeError), "in the input spec is expected to have 2 dimensions, but had 1", ): neuropod = load_neuropod(TestSpecValidation.neuropod_path) neuropod.infer({"in_float32_matrix": np.asarray([3], dtype=np.float32)})
def test_invalid_input_name(self): with six.assertRaisesRegex( self, (ValueError, RuntimeError), "are not found in the input spec" ): neuropod = load_neuropod(TestSpecValidation.neuropod_path) neuropod.infer( {"bogus": np.asarray([[1.1, 2.2], [0, 1], [2, 3]], dtype=np.float32)} )
def test_no_inputs(self): neuropod = load_neuropod(TestSpecValidation.neuropod_path) result = neuropod.infer({}) self.assertGreater(result["out_string_vector"].shape[0], 0) self.assertEqual( result["out_string_vector"].shape[0], result["out_int_matrix"].shape[0] ) self.assertGreater(result["out_float_matrix"].shape[0], 0)
def test_some_inputs(self): neuropod = load_neuropod(TestSpecValidation.neuropod_path) result = neuropod.infer( { "in_float32_matrix": np.asarray( [[1.1, 2.2], [0, 1], [2, 3]], dtype=np.float32 ) } ) self.assertGreater(result["out_string_vector"].shape[0], 0)
def check_strings_model(neuropod_path): """ Validate that the inputs and outputs of the loaded neuropod match the problem spec """ with load_neuropod(neuropod_path, _always_use_native=False) as neuropod: target = get_string_concat_model_spec() # Validate that the specs match check_specs_match(neuropod.inputs, target["input_spec"]) check_specs_match(neuropod.outputs, target["output_spec"])
def test_stateful_model(self): # `init_op` can be passed a list of strings or a string for init_op_name_as_list in [False, True]: with TemporaryDirectory() as test_dir: neuropod_path = os.path.join(test_dir, "test_neuropod") self.package_accumulator_model(neuropod_path, init_op_name_as_list) neuropod_obj = load_neuropod(neuropod_path) np.testing.assert_equal(neuropod_obj.name, "accumulator_model") np.testing.assert_equal(neuropod_obj.platform, "tensorflow") np.testing.assert_equal( neuropod_obj.infer({"x": np.float32(2.0)}), {"out": 2.0}) np.testing.assert_equal( neuropod_obj.infer({"x": np.float32(4.0)}), {"out": 6.0})
def check_addition_model(neuropod_path): """ Validate that the inputs and outputs of the loaded neuropod match the problem spec """ with load_neuropod(neuropod_path) as neuropod: target = get_addition_model_spec() # Validate that the specs match check_specs_match(neuropod.inputs, target["input_spec"]) check_specs_match(neuropod.outputs, target["output_spec"]) expected_name = "addition_model" if neuropod.name != expected_name: raise ValueError("Expected model name '{}'. Got '{}'".format( expected_name, neuropod.name)) if not neuropod.platform: raise ValueError("Expected the platform field to be set")
def t_neuropod(csv_filename): ####### # Setup ####### dir_path = os.path.dirname(csv_filename) image_dest_folder = os.path.join(os.getcwd(), 'generated_images') audio_dest_folder = os.path.join(os.getcwd(), 'generated_audio') input_features = [ binary_feature(), numerical_feature(), category_feature(vocab_size=3), sequence_feature(vocab_size=3), text_feature(vocab_size=3), vector_feature(), image_feature(image_dest_folder), audio_feature(audio_dest_folder), timeseries_feature(), date_feature(), h3_feature(), set_feature(vocab_size=3), bag_feature(vocab_size=3), ] output_features = [ binary_feature(), numerical_feature(), category_feature(vocab_size=3), sequence_feature(vocab_size=3), text_feature(vocab_size=3), set_feature(vocab_size=3), vector_feature() ] # Generate test data data_csv_path = generate_data(input_features, output_features, csv_filename) ############# # Train model ############# model_definition = { 'input_features': input_features, 'output_features': output_features, 'training': { 'epochs': 2 } } ludwig_model = LudwigModel(model_definition) ludwig_model.train( data_csv=data_csv_path, skip_save_training_description=True, skip_save_training_statistics=True, skip_save_model=True, skip_save_progress=True, skip_save_log=True, skip_save_processed_input=True, ) original_predictions_df = ludwig_model.predict(data_csv=data_csv_path) ################### # save Ludwig model ################### ludwigmodel_path = os.path.join(dir_path, 'ludwigmodel') shutil.rmtree(ludwigmodel_path, ignore_errors=True) ludwig_model.save(ludwigmodel_path) ################ # build neuropod ################ neuropod_path = os.path.join(dir_path, 'neuropod') export_neuropod(ludwigmodel_path, neuropod_path=neuropod_path) ######################## # predict using neuropod ######################## data_df = pd.read_csv(data_csv_path) if_dict = { input_feature['name']: np.expand_dims( np.array([str(x) for x in data_df[input_feature['name']].tolist()], dtype='str'), 1) for input_feature in input_features } from neuropod.loader import load_neuropod neuropod_model = load_neuropod(neuropod_path) preds = neuropod_model.infer(if_dict) for key in preds: preds[key] = np.squeeze(preds[key]) ######### # cleanup ######### # Delete the temporary data created for path in [ ludwigmodel_path, neuropod_path, image_dest_folder, audio_dest_folder ]: if os.path.exists(path): if os.path.isfile(path): os.remove(path) else: shutil.rmtree(path, ignore_errors=True) ######## # checks ######## for output_feature in output_features: output_feature_name = output_feature['name'] output_feature_type = output_feature['type'] if (output_feature_name + "_predictions" in preds and output_feature_name + "_predictions" in original_predictions_df): neuropod_pred = preds[output_feature_name + "_predictions"].tolist() if output_feature_type == BINARY: neuropod_pred = list(map(lambda x: str2bool(x), neuropod_pred)) if output_feature_type in {SEQUENCE, TEXT, SET}: neuropod_pred = list(map(lambda x: x.split(), neuropod_pred)) original_pred = original_predictions_df[output_feature_name + "_predictions"].tolist() assert neuropod_pred == original_pred if (output_feature_name + "_probability" in preds and output_feature_name + "_probability" in original_predictions_df): neuropod_prob = preds[output_feature_name + "_probability"].tolist() if output_feature_type in {SEQUENCE, TEXT, SET}: neuropod_prob = list( map(lambda x: [float(n) for n in x.split()], neuropod_prob)) if any(isinstance(el, list) for el in neuropod_prob): neuropod_prob = np.array( list(itertools.zip_longest(*neuropod_prob, fillvalue=0))).T original_prob = original_predictions_df[output_feature_name + "_probability"].tolist() if any(isinstance(el, list) for el in original_prob): original_prob = np.array( list(itertools.zip_longest(*original_prob, fillvalue=0))).T assert np.isclose(neuropod_prob, original_prob).all() if (output_feature_name + "_probabilities" in preds and output_feature_name + "_probabilities" in original_predictions_df): neuropod_prob = preds[output_feature_name + "_probabilities"].tolist() original_prob = original_predictions_df[output_feature_name + "_probabilities"].tolist() assert np.isclose(neuropod_prob, original_prob).all()
def test_neuropod(csv_filename): ####### # Setup ####### with tempfile.TemporaryDirectory() as tmpdir: dir_path = tmpdir data_csv_path = os.path.join(tmpdir, csv_filename) image_dest_folder = os.path.join(tmpdir, "generated_images") audio_dest_folder = os.path.join(tmpdir, "generated_audio") input_features = [ binary_feature(), numerical_feature(), category_feature(vocab_size=3), sequence_feature(vocab_size=3), text_feature(vocab_size=3), vector_feature(), image_feature(image_dest_folder), audio_feature(audio_dest_folder), timeseries_feature(), date_feature(), h3_feature(), set_feature(vocab_size=3), bag_feature(vocab_size=3), ] output_features = [ binary_feature(), numerical_feature(), category_feature(vocab_size=3), sequence_feature(vocab_size=3), text_feature(vocab_size=3), set_feature(vocab_size=3), vector_feature(), ] # Generate test data data_csv_path = generate_data(input_features, output_features, data_csv_path) ############# # Train model ############# config = {"input_features": input_features, "output_features": output_features, "training": {"epochs": 2}} ludwig_model = LudwigModel(config, backend=LocalTestBackend()) ludwig_model.train( dataset=data_csv_path, skip_save_training_description=True, skip_save_training_statistics=True, skip_save_progress=True, skip_save_log=True, skip_save_processed_input=True, output_directory=dir_path, ) data_df = pd.read_csv(data_csv_path) original_predictions_df, _ = ludwig_model.predict(dataset=data_df) ################### # save Ludwig model ################### ludwigmodel_path = os.path.join(dir_path, "ludwigmodel") shutil.rmtree(ludwigmodel_path, ignore_errors=True) ludwig_model.save(ludwigmodel_path) ################ # build neuropod ################ neuropod_path = os.path.join(dir_path, "neuropod") shutil.rmtree(neuropod_path, ignore_errors=True) export_neuropod(ludwigmodel_path, neuropod_path=neuropod_path, entrypoint="get_test_model") ######################## # predict using neuropod ######################## if_dict = { input_feature["name"]: np.expand_dims( np.array([str(x) for x in data_df[input_feature["name"]].tolist()], dtype="str"), 1 ) for input_feature in input_features } from neuropod.loader import load_neuropod neuropod_model = load_neuropod(neuropod_path, _always_use_native=False) preds = neuropod_model.infer(if_dict) for key in preds: preds[key] = np.squeeze(preds[key]) ######### # cleanup ######### # Delete the temporary data created for path in [ludwigmodel_path, neuropod_path, image_dest_folder, audio_dest_folder]: if os.path.exists(path): if os.path.isfile(path): os.remove(path) else: shutil.rmtree(path, ignore_errors=True) ######## # checks ######## for output_feature in output_features: output_feature_name = output_feature["name"] output_feature_type = output_feature["type"] if ( output_feature_name + "_predictions" in preds and output_feature_name + "_predictions" in original_predictions_df ): neuropod_pred = preds[output_feature_name + "_predictions"].tolist() if output_feature_type == BINARY: neuropod_pred = [str2bool(x) for x in neuropod_pred] if output_feature_type in {SEQUENCE, TEXT, SET}: neuropod_pred = [x.split() for x in neuropod_pred] original_pred = original_predictions_df[output_feature_name + "_predictions"].tolist() assert neuropod_pred == original_pred if ( output_feature_name + "_probability" in preds and output_feature_name + "_probability" in original_predictions_df ): neuropod_prob = preds[output_feature_name + "_probability"].tolist() if output_feature_type in {SEQUENCE, TEXT, SET}: neuropod_prob = [[float(n) for n in x.split()] for x in neuropod_prob] if any(isinstance(el, list) for el in neuropod_prob): neuropod_prob = np.array(list(itertools.zip_longest(*neuropod_prob, fillvalue=0))).T original_prob = original_predictions_df[output_feature_name + "_probability"].tolist() if any(isinstance(el, list) for el in original_prob): original_prob = np.array(list(itertools.zip_longest(*original_prob, fillvalue=0))).T assert np.allclose(neuropod_prob, original_prob) if ( output_feature_name + "_probabilities" in preds and output_feature_name + "_probabilities" in original_predictions_df ): neuropod_prob = preds[output_feature_name + "_probabilities"].tolist() original_prob = original_predictions_df[output_feature_name + "_probabilities"].tolist() assert np.allclose(neuropod_prob, original_prob)
def test_neuropod(csv_filename): ####### # Setup ####### dir_path = os.path.dirname(csv_filename) output_feature_options = [] # Single sequence input, multiple outputs sf = sequence_feature() input_features = [sf] input_feature_name = input_features[0]['name'] output_features = [ binary_feature(), numerical_feature(), category_feature(vocab_size=3), sequence_feature(vocab_size=3), text_feature(vocab_size=3), set_feature(vocab_size=3), vector_feature() ] # Generate test data data_csv_path = generate_data(input_features, output_features, csv_filename) ############# # Train model ############# model_definition = { 'input_features': input_features, 'output_features': output_features, 'training': { 'epochs': 2 } } ludwig_model = LudwigModel(model_definition) ludwig_model.train( data_csv=data_csv_path, skip_save_training_description=True, skip_save_training_statistics=True, skip_save_model=True, skip_save_progress=True, skip_save_log=True, skip_save_processed_input=True, ) original_predictions_df = ludwig_model.predict(data_csv=data_csv_path) ################### # save Ludwig model ################### ludwigmodel_path = os.path.join(dir_path, 'ludwigmodel') shutil.rmtree(ludwigmodel_path, ignore_errors=True) ludwig_model.save(ludwigmodel_path) ################ # build neuropod ################ neuropod_path = os.path.join(dir_path, 'neuropod') export_neuropod(ludwigmodel_path, neuropod_path=neuropod_path) ######################## # predict using neuropod ######################## data_df = pd.read_csv(data_csv_path) if_vals = data_df[input_feature_name].tolist() from neuropod.loader import load_neuropod neuropod_model = load_neuropod(neuropod_path) preds = neuropod_model.infer( {input_feature_name: np.array(if_vals, dtype='str')}) ######### # cleanup ######### for path in [ludwigmodel_path, neuropod_path]: if os.path.exists(path): if os.path.isfile(path): os.remove(path) else: shutil.rmtree(path, ignore_errors=True) ######## # checks ######## for output_feature in output_features: output_feature_name = output_feature['name'] output_feature_type = output_feature['type'] if (output_feature_name + "_predictions" in preds and output_feature_name + "_predictions" in original_predictions_df): neuropod_pred = preds[output_feature_name + "_predictions"].tolist() if output_feature_type == BINARY: neuropod_pred = list(map(lambda x: str2bool(x), neuropod_pred)) if output_feature_type in {SEQUENCE, TEXT, SET}: neuropod_pred = list(map(lambda x: x.split(), neuropod_pred)) original_pred = original_predictions_df[output_feature_name + "_predictions"].tolist() assert neuropod_pred == original_pred if (output_feature_name + "_probability" in preds and output_feature_name + "_probability" in original_predictions_df): neuropod_prob = preds[output_feature_name + "_probability"].tolist() if output_feature_type in {SEQUENCE, TEXT, SET}: neuropod_prob = list( map(lambda x: [float(n) for n in x.split()], neuropod_prob)) if any(isinstance(el, list) for el in neuropod_prob): neuropod_prob = np.array( list(itertools.zip_longest(*neuropod_prob, fillvalue=0))).T original_prob = original_predictions_df[output_feature_name + "_probability"].tolist() if any(isinstance(el, list) for el in original_prob): original_prob = np.array( list(itertools.zip_longest(*original_prob, fillvalue=0))).T assert np.isclose(neuropod_prob, original_prob).all() if (output_feature_name + "_probabilities" in preds and output_feature_name + "_probabilities" in original_predictions_df): neuropod_prob = preds[output_feature_name + "_probabilities"].tolist() original_prob = original_predictions_df[output_feature_name + "_probabilities"].tolist() assert np.isclose(neuropod_prob, original_prob).all()
def test_neuropod_torchscript(csv_filename, tmpdir): data_csv_path = os.path.join(tmpdir, csv_filename) # Configure features to be tested: bin_str_feature = binary_feature() input_features = [ bin_str_feature, # binary_feature(), number_feature(), category_feature(vocab_size=3), # TODO: future support # sequence_feature(vocab_size=3), # text_feature(vocab_size=3), # vector_feature(), # image_feature(image_dest_folder), # audio_feature(audio_dest_folder), # timeseries_feature(), # date_feature(), # h3_feature(), # set_feature(vocab_size=3), # bag_feature(vocab_size=3), ] output_features = [ bin_str_feature, # binary_feature(), number_feature(), category_feature(vocab_size=3), # TODO: future support # sequence_feature(vocab_size=3), # text_feature(vocab_size=3), # set_feature(vocab_size=3), # vector_feature() ] backend = LocalTestBackend() config = { "input_features": input_features, "output_features": output_features, TRAINER: { "epochs": 2 } } # Generate training data training_data_csv_path = generate_data(input_features, output_features, data_csv_path) # Convert bool values to strings, e.g., {'Yes', 'No'} df = pd.read_csv(training_data_csv_path) false_value, true_value = "No", "Yes" df[bin_str_feature[NAME]] = df[bin_str_feature[NAME]].map( lambda x: true_value if x else false_value) df.to_csv(training_data_csv_path) # Train Ludwig (Pythonic) model: ludwig_model = LudwigModel(config, backend=backend) ludwig_model.train( dataset=training_data_csv_path, skip_save_training_description=True, skip_save_training_statistics=True, skip_save_model=True, skip_save_progress=True, skip_save_log=True, skip_save_processed_input=True, ) # Obtain predictions from Python model preds_dict, _ = ludwig_model.predict(dataset=training_data_csv_path, return_type=dict) # Create graph inference model (Torchscript) from trained Ludwig model. neuropod_path = os.path.join(tmpdir, "neuropod") export_neuropod(ludwig_model, neuropod_path) from neuropod.loader import load_neuropod neuropod_module = load_neuropod(neuropod_path) def to_input(s: pd.Series) -> Union[List[str], torch.Tensor]: if s.dtype == "object": return np.array(s.to_list()) return s.to_numpy().astype(np.float32) df = pd.read_csv(training_data_csv_path) inputs = { name: to_input(df[feature.column]) for name, feature in ludwig_model.model.input_features.items() } outputs = neuropod_module.infer(inputs) # Compare results from Python trained model against Neuropod assert len(preds_dict) == len(outputs) for feature_name, feature_outputs_expected in preds_dict.items(): assert feature_name in outputs output_values_expected = feature_outputs_expected[PREDICTIONS] output_values = outputs[feature_name] if output_values.dtype.type in {np.string_, np.str_}: # Strings should match exactly assert np.all(output_values == output_values_expected ), f"feature: {feature_name}, output: predictions" else: assert np.allclose( output_values, output_values_expected ), f"feature: {feature_name}, output: predictions"