Пример #1
0
def compare_perf():
    "compare performance of two models"
    test_file = SCRIPT_DIR / 'rotten_tomatoes_test.csv'
    output_dir = get_ludwig_output_dir()

    model_name = "run"

    experiment_name1 = "rt"
    experiment_dir = experiment_name1 + '_' + model_name
    model_dir1 = output_dir / 'results' / experiment_dir / 'model'

    model1 = LudwigModel.load(model_dir1, backend='local')
    eval_stats1, predictions1, output_dir1 = model1.evaluate(
        dataset=str(test_file))

    experiment_name2 = "rt_zscore"
    experiment_dir = experiment_name2 + '_' + model_name
    model_dir2 = output_dir / 'results' / experiment_dir / 'model'

    model2 = LudwigModel.load(model_dir2, backend='local')
    eval_stats2, predictions2, output_dir2 = model2.evaluate(
        dataset=str(test_file))

    list_of_eval_stats = [eval_stats1, eval_stats2]
    model_names = [experiment_name1, experiment_name2]
    compare_performance(
        list_of_eval_stats,
        "recommended",
        model_names=model_names,
        output_directory=output_dir,
        file_format="png",
    )
    print(f'{output_dir=}')
Пример #2
0
    def predict(self, mode='predict', ignore_columns=[]):
        predict_dataframe, model_definition = self._create_ludwig_dataframe(mode)
        model_definition = self.transaction.hmd['ludwig_data']['model_definition']

        model = LudwigModel.load(self.transaction.lmd['ludwig_data']['ludwig_save_path'])

        if self.transaction.lmd['model_order_by'] is None:
            timeseries_cols = []
        else:
            timeseries_cols = list(map(lambda x: x[0], self.transaction.lmd['model_order_by']))

        if len(timeseries_cols) > 0:
            predict_dataframe, model_definition =  self._translate_df_to_timeseries_format(predict_dataframe, model_definition, timeseries_cols)

        for ignore_col in ignore_columns:
            try:
                predict_dataframe[ignore_col] = [None] * len(predict_dataframe[ignore_col])
            except:
                for date_appendage in ['_year', '_month','_day']:
                    predict_dataframe[ignore_col + date_appendage] = [None] * len(predict_dataframe[ignore_col + date_appendage])

        with disable_ludwig_output():
            model = LudwigModel.load(self.transaction.lmd['ludwig_data']['ludwig_save_path'])
            predictions = model.predict(data_df=predict_dataframe)

        for col_name in predictions:
            col_name_normalized = col_name.replace('_predictions', '')
            predictions = predictions.rename(columns = {col_name: col_name_normalized})

        return predictions
Пример #3
0
 def predict_test_with_ludwig(self):
     ludwig_model = LudwigModel.load("results/api_experiment_cifar/model")
     predictions = ludwig_model.predict(data_csv="dataset/test.csv")
     predictions.to_csv(index=False,
                        header=True,
                        path_or_buf="predicted.csv")
     print(predictions)
Пример #4
0
def export_triton(model_path,
                  output_path="model_repository",
                  model_name="ludwig_model",
                  model_version=1,
                  **kwargs):
    """Exports a model in torchscript format with config for Triton serving.

    # Inputs

    :param model_path: (str) filepath to pre-trained model.
    :param output_path: (str, default: `'model_repository'`)  directory to store the
        triton models.
    :param model_name: (str, default: `'ludwig_model'`) save triton under this name.
    :param model_name: (int, default: `1`) save neuropod under this verison.

    # Return

    :returns: (`None`)
    """
    logger.info(f"Model path: {model_path}")
    logger.info(f"Output path: {output_path}")
    logger.info(f"Model name: {model_name}")
    logger.info(f"Model version: {model_version}")
    logger.info("\n")

    model = LudwigModel.load(model_path)
    os.makedirs(output_path, exist_ok=True)
    utils_export_triton(model, output_path, model_name, model_version)

    logger.info(f"Saved to: {output_path}")
Пример #5
0
    def predict(self, mode='predict', ignore_columns=[]):
        predict_dataframe, model_definition, timeseries_cols = self._create_ludwig_dataframe(
            mode)
        model_definition = self.transaction.hmd['ludwig_data'][
            'model_definition']

        if len(timeseries_cols) > 0:
            predict_dataframe, model_definition = self._translate_df_to_timeseries_format(
                predict_dataframe, model_definition, timeseries_cols)

        for ignore_col in ignore_columns:
            try:
                predict_dataframe[ignore_col] = [None] * len(
                    predict_dataframe[ignore_col])
            except:
                for date_appendage in ['_year', '_month', '_day']:
                    predict_dataframe[ignore_col + date_appendage] = [
                        None
                    ] * len(predict_dataframe[ignore_col + date_appendage])

        with disable_console_output(True):
            model_dir = self.get_model_dir()
            model = LudwigModel.load(model_dir=model_dir)
            predictions = model.predict(data_df=predict_dataframe,
                                        gpus=self.get_useable_gpus())

        for col_name in predictions:
            col_name_normalized = col_name.replace('_predictions', '')
            predictions = predictions.rename(
                columns={col_name: col_name_normalized})

        return predictions
Пример #6
0
def run_api_experiment(input_features, output_features, dataset, **kwargs):
    config = {
        'input_features': input_features,
        'output_features': output_features,
        'combiner': {'type': 'concat', 'fc_size': 14},
        'training': {'epochs': 2}
    }

    model = LudwigModel(config)
    output_dir = None

    try:
        # Training with csv
        _, _, output_dir = model.train(
            dataset=dataset,
            **kwargs
        )

        model.predict(dataset=dataset)

        # Attempt loading saved model, should broadcast successfully
        model_dir = os.path.join(output_dir, 'model') if output_dir else None
        loaded_model = LudwigModel.load(model_dir)

        # Model loading should broadcast weights from coordinator
        loaded_weights = loaded_model.model.get_weights()
        bcast_weights = hvd.broadcast_object(loaded_weights)
        for loaded, bcast in zip(loaded_weights, bcast_weights):
            assert np.allclose(loaded, bcast)
    finally:
        if output_dir:
            shutil.rmtree(output_dir, ignore_errors=True)
Пример #7
0
def run_api_experiment(input_features, output_features, dataset, **kwargs):
    config = {
        "input_features": input_features,
        "output_features": output_features,
        "combiner": {"type": "concat", "fc_size": 14},
        "training": {"epochs": 2},
    }

    model = LudwigModel(config)
    output_dir = None

    try:
        # Training with csv
        _, _, output_dir = model.train(dataset=dataset, **kwargs)

        model.predict(dataset=dataset)

        # Attempt loading saved model, should broadcast successfully
        model_dir = os.path.join(output_dir, "model") if output_dir else None
        loaded_model = LudwigModel.load(model_dir)

        # Model loading should broadcast weights from coordinator
        loaded_state = loaded_model.model.state_dict()
        bcast_state = hvd.broadcast_object(loaded_state)
        for loaded, bcast in zip(loaded_state.values(), bcast_state.values()):
            assert np.allclose(loaded, bcast)
    finally:
        if output_dir:
            shutil.rmtree(output_dir, ignore_errors=True)
Пример #8
0
def collect_weights(model_path: str, tensors: List[str], output_directory: str = "results", **kwargs) -> List[str]:
    """Loads a pretrained model and collects weights.

    # Inputs
    :param model_path: (str) filepath to pre-trained model.
    :param tensors: (list, default: `None`) List of tensor names to collect
        weights
    :param output_directory: (str, default: `'results'`) the directory where
        collected weights will be stored.

    # Return

    :return: (List[str]) list of filepath to `*.npy` files containing
        the weights.
    """
    logger.info(f"Model path: {model_path}")
    logger.info(f"Output path: {output_directory}")
    logger.info("\n")

    model = LudwigModel.load(model_path)

    # collect weights
    print_boxed("COLLECT WEIGHTS")
    collected_tensors = model.collect_weights(tensors)

    # saving
    os.makedirs(output_directory, exist_ok=True)
    saved_filenames = save_tensors(collected_tensors, output_directory)

    logger.info(f"Saved to: {output_directory}")
    return saved_filenames
Пример #9
0
def predict_with_backend(tmpdir,
                         config,
                         data_csv_path,
                         backend,
                         patch_args=None):
    with init_backend(backend):
        if backend == "ray":
            backend = RAY_BACKEND_CONFIG
            backend["processor"]["type"] = "dask"

        ludwig_model = LudwigModel(config, backend=backend)
        _, _, output_directory = ludwig_model.train(
            dataset=data_csv_path,
            output_directory=os.path.join(tmpdir, "output"),
        )
        # Check that metadata JSON saves and loads correctly
        ludwig_model = LudwigModel.load(os.path.join(output_directory,
                                                     "model"))

        if patch_args is not None:
            with mock.patch(*patch_args):
                preds_df, _ = ludwig_model.predict(dataset=data_csv_path)
        else:
            preds_df, _ = ludwig_model.predict(dataset=data_csv_path)

    return preds_df, ludwig_model
Пример #10
0
def test_model_loaded_from_old_config_prediction_works(tmpdir):
    # Titanic model based on 0.5.3.
    old_model_url = "https://predibase-public-us-west-2.s3.us-west-2.amazonaws.com/ludwig_unit_tests/old_model.zip"
    old_model_filename = wget.download(old_model_url, tmpdir)
    with zipfile.ZipFile(old_model_filename, "r") as zip_ref:
        zip_ref.extractall(tmpdir)
    example_data = {
        "PassengerId": 892,
        "Pclass": 3,
        "Name": "Kelly, Mr. James",
        "Sex": "male",
        "Age": 34.5,
        "SibSp": 0,
        "Parch": 0,
        "Ticket": "330911",
        "Fare": 7.8292,
        "Cabin": None,
        "Embarked": "Q",
    }
    test_set = pd.DataFrame(example_data, index=[0])

    ludwig_model = LudwigModel.load(os.path.join(tmpdir, "old_model/model"))
    predictions, _ = ludwig_model.predict(dataset=test_set)

    assert predictions.to_dict()["Survived_predictions"] == {0: False}
Пример #11
0
def test_missing_value_prediction(csv_filename):
    with tempfile.TemporaryDirectory() as tmpdir:
        input_features = [
            category_feature(
                vocab_size=2,
                reduce_input='sum',
                preprocessing=dict(missing_value_strategy='fill_with_mode'))
        ]
        output_features = [binary_feature()]

        dataset = pd.read_csv(
            generate_data(input_features, output_features, csv_filename))

        config = {
            'input_features': input_features,
            'output_features': output_features,
            'combiner': {
                'type': 'concat',
                'fc_size': 14
            },
        }
        model = LudwigModel(config)
        _, _, output_dir = model.train(dataset=dataset,
                                       output_directory=tmpdir)

        # Set the input column to None, we should be able to replace the missing value with the mode
        # from the training set
        dataset[input_features[0]['name']] = None
        model.predict(dataset=dataset)

        model = LudwigModel.load(os.path.join(output_dir, 'model'))
        model.predict(dataset=dataset)
Пример #12
0
def export_neuropod(model_path,
                    output_path="neuropod",
                    model_name="neuropod",
                    **kwargs):
    """Exports a model to Neuropod.

    # Inputs

    :param model_path: (str) filepath to pre-trained model.
    :param output_path: (str, default: `'neuropod'`)  directory to store the
        neuropod model.
    :param model_name: (str, default: `'neuropod'`) save neuropod under this
        name.

    # Return

    :returns: (`None`)
    """
    logger.info(f"Model path: {model_path}")
    logger.info(f"Output path: {output_path}")
    logger.info("\n")

    model = LudwigModel.load(model_path)
    os.makedirs(output_path, exist_ok=True)
    utils_export_neuropod(model, output_path, model_name)

    logger.info(f"Saved to: {output_path}")
Пример #13
0
def export_torchscript(model_path: str,
                       model_only: bool = False,
                       output_path: str = "torchscript",
                       device: Optional[str] = None,
                       **kwargs) -> None:
    """Exports a model to torchscript.

    # Inputs

    :param model_path: (str) filepath to pre-trained model.
    :param model_only: (bool, default: `False`) If true, scripts and exports the model only.
    :param output_path: (str, default: `'torchscript'`) directory to store torchscript

    # Return
    :returns: (`None`)
    """
    logger.info(f"Model path: {model_path}")
    logger.info(f"Saving model only: {model_only}")
    logger.info(f"Output path: {output_path}")
    logger.info("\n")

    model = LudwigModel.load(model_path)
    os.makedirs(output_path, exist_ok=True)
    model.save_torchscript(output_path, model_only=model_only, device=device)

    logger.info(f"Saved to: {output_path}")
Пример #14
0
def test_collect_weights(csv_filename):
    output_dir = None
    try:
        # This will reset the layer numbering scheme TensorFlow uses.
        # Otherwise, when we load the model, its layer names will be appended
        # with "_1".
        tf.keras.backend.reset_uids()

        model, output_dir = _train(*_prepare_data(csv_filename))
        model_path = os.path.join(output_dir, 'model')
        weights = [w for name, w in model.model.collect_weights()]

        #  1 for the encoder (embeddings),
        #  2 for the decoder classifier (w and b)
        assert len(weights) == 3

        # Load model from disk to ensure correct weight names
        tf.keras.backend.reset_uids()
        model_loaded = LudwigModel.load(model_path)
        tensor_names = [name for name, w in model_loaded.collect_weights()]
        assert len(tensor_names) == 3

        tf.keras.backend.reset_uids()
        with tempfile.TemporaryDirectory() as output_directory:
            filenames = collect_weights(model_path, tensor_names,
                                        output_directory)
            assert len(filenames) == 3

            for weight, filename in zip(weights, filenames):
                saved_weight = np.load(filename)
                assert np.allclose(weight.numpy(), saved_weight,
                                   rtol=1.e-4), filename
    finally:
        if output_dir:
            shutil.rmtree(output_dir, ignore_errors=True)
Пример #15
0
def test_collect_weights(tmpdir, csv_filename):
    output_dir = None
    try:
        model, output_dir = _train(*_prepare_data(csv_filename))
        model_path = os.path.join(output_dir, "model")

        # 1 for the encoder (embeddings).
        # 2 for the decoder classifier (w and b).
        weights = [w for _, w in model.model.collect_weights()]
        assert len(weights) == 3

        # Load model from disk to ensure correct weight names
        model_loaded = LudwigModel.load(model_path)
        tensor_names = [name for name, w in model_loaded.collect_weights()]
        assert len(tensor_names) == 3

        filenames = collect_weights(model_path, tensor_names, tmpdir)
        assert len(filenames) == 3

        for weight, filename in zip(weights, filenames):
            saved_weight = np.load(filename)
            assert torch.allclose(weight,
                                  torch.from_numpy(saved_weight).to(DEVICE),
                                  rtol=1.0e-4), filename
    finally:
        if output_dir:
            shutil.rmtree(output_dir, ignore_errors=True)
Пример #16
0
def test_missing_value_prediction(csv_filename):
    random.seed(1)
    np.random.seed(1)
    with tempfile.TemporaryDirectory() as tmpdir:
        input_features = [
            category_feature(
                vocab_size=2,
                reduce_input="sum",
                preprocessing=dict(missing_value_strategy="fill_with_mode"))
        ]
        output_features = [binary_feature()]

        dataset = pd.read_csv(
            generate_data(input_features, output_features, csv_filename))

        config = {
            "input_features": input_features,
            "output_features": output_features,
            "combiner": {
                "type": "concat",
                "fc_size": 14
            },
        }
        model = LudwigModel(config)
        _, _, output_dir = model.train(dataset=dataset,
                                       output_directory=tmpdir)

        # Set the input column to None, we should be able to replace the missing value with the mode
        # from the training set
        dataset[input_features[0]["name"]] = None
        model.predict(dataset=dataset)

        model = LudwigModel.load(os.path.join(output_dir, "model"))
        model.predict(dataset=dataset)
Пример #17
0
def predict_cli(model_path,
                dataset=None,
                data_format=None,
                batch_size=128,
                skip_save_unprocessed_output=False,
                skip_save_predictions=False,
                output_directory='results',
                gpus=None,
                gpu_memory_limit=None,
                allow_parallel_threads=True,
                use_horovod=None,
                logging_level=logging.INFO,
                debug=False,
                **kwargs):
    model = LudwigModel.load(model_path,
                             logging_level=logging_level,
                             use_horovod=use_horovod,
                             gpus=gpus,
                             gpu_memory_limit=gpu_memory_limit,
                             allow_parallel_threads=allow_parallel_threads)
    model.predict(
        dataset=dataset,
        data_format=data_format,
        batch_size=batch_size,
        skip_save_unprocessed_output=skip_save_unprocessed_output,
        skip_save_predictions=skip_save_predictions,
        output_directory=output_directory,
        return_type=dict,
        debug=debug,
    )
Пример #18
0
def test_tune_batch_size_and_lr(tmpdir):
    with tempfile.TemporaryDirectory() as outdir:
        input_features = [sequence_feature(reduce_output="sum")]
        output_features = [category_feature(vocab_size=2, reduce_input="sum")]

        csv_filename = os.path.join(tmpdir, "training.csv")
        data_csv = generate_data(input_features, output_features, csv_filename)
        val_csv = shutil.copyfile(data_csv,
                                  os.path.join(tmpdir, "validation.csv"))
        test_csv = shutil.copyfile(data_csv, os.path.join(tmpdir, "test.csv"))

        config = {
            "input_features": input_features,
            "output_features": output_features,
            "combiner": {
                "type": "concat",
                "output_size": 14
            },
            TRAINER: {
                "epochs": 2,
                "batch_size": "auto",
                "eval_batch_size": "auto",
                "learning_rate": "auto",
            },
        }

        model = LudwigModel(config, backend=LocalTestBackend())

        # check preconditions
        assert model.config[TRAINER][BATCH_SIZE] == "auto"
        assert model.config[TRAINER][EVAL_BATCH_SIZE] == "auto"
        assert model.config[TRAINER][LEARNING_RATE] == "auto"

        _, _, output_directory = model.train(training_set=data_csv,
                                             validation_set=val_csv,
                                             test_set=test_csv,
                                             output_directory=outdir)

        def check_postconditions(model):
            # check batch size
            assert model.config[TRAINER][BATCH_SIZE] != "auto"
            assert model.config[TRAINER][BATCH_SIZE] > 1

            assert model.config[TRAINER][EVAL_BATCH_SIZE] != "auto"
            assert model.config[TRAINER][EVAL_BATCH_SIZE] > 1

            assert model.config[TRAINER][BATCH_SIZE] == model.config[TRAINER][
                EVAL_BATCH_SIZE]

            # check learning rate
            assert model.config[TRAINER][LEARNING_RATE] != "auto"
            assert model.config[TRAINER][LEARNING_RATE] > 0

        check_postconditions(model)

        model = LudwigModel.load(os.path.join(output_directory, "model"))

        # loaded model should retain the tuned params
        check_postconditions(model)
Пример #19
0
def collect_activations(model_path,
                        layers,
                        dataset,
                        data_format=None,
                        batch_size=128,
                        output_directory='results',
                        gpus=None,
                        gpu_memory_limit=None,
                        allow_parallel_threads=True,
                        use_horovod=None,
                        debug=False,
                        **kwargs):
    """Uses the pretrained model to collect the tensors corresponding to a
    datapoint in the dataset. Saves the tensors to the experiment directory

    :param model_path: Is the model from which the tensors will be collected
    :param layers: List of layer names we wish to collect the output from
    :param data_csv: The CSV filepath which contains the datapoints from which
           the tensors are collected
    :param data_hdf5: The HDF5 file path if the CSV file path does not exist,
           an alternative source of providing the data to the model
    :param split: Split type
    :param batch_size: Batch size
    :param output_directory: Output directory
    :param gpus: The total number of GPUs that the model intends to use
    :param gpu_memory_limit: (int: default: `None`) maximum memory in MB to allocate
           per GPU device.
    :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow to use
           multithreading parallelism to improve performance at the cost of
           determinism.
    :param debug: To step through the stack traces and find possible errors
    :returns: None

    """
    logger.info('Dataset path: {}'.format(dataset))
    logger.info('Model path: {}'.format(model_path))
    logger.info('Output path: {}'.format(output_directory))
    logger.info('\n')

    model = LudwigModel.load(model_path,
                             gpus=gpus,
                             gpu_memory_limit=gpu_memory_limit,
                             allow_parallel_threads=allow_parallel_threads,
                             use_horovod=use_horovod)

    # collect activations
    print_boxed('COLLECT ACTIVATIONS')
    collected_tensors = model.collect_activations(layers,
                                                  dataset,
                                                  data_format=data_format,
                                                  batch_size=batch_size,
                                                  debug=debug)

    # saving
    os.makedirs(output_directory, exist_ok=True)
    saved_filenames = save_tensors(collected_tensors, output_directory)

    logger.info('Saved to: {0}'.format(output_directory))
    return saved_filenames
Пример #20
0
def run_api_experiment(input_features, output_features, data_csv):
    """
    Helper method to avoid code repetition in running an experiment
    :param input_features: input schema
    :param output_features: output schema
    :param data_csv: path to data
    :return: None
    """
    config = {
        "input_features": input_features,
        "output_features": output_features,
        "combiner": {
            "type": "concat",
            "fc_size": 14
        },
        "training": {
            "epochs": 2
        },
    }

    model = LudwigModel(config)
    output_dir = None

    try:
        # Training with csv
        _, _, output_dir = model.train(
            dataset=data_csv,
            skip_save_processed_input=True,
            skip_save_progress=True,
            skip_save_unprocessed_output=True,
        )
        model.predict(dataset=data_csv)

        model_dir = os.path.join(output_dir, "model")
        loaded_model = LudwigModel.load(model_dir)

        # Necessary before call to get_weights() to materialize the weights
        loaded_model.predict(dataset=data_csv)

        model_weights = model.model.get_weights()
        loaded_weights = loaded_model.model.get_weights()
        for model_weight, loaded_weight in zip(model_weights, loaded_weights):
            assert np.allclose(model_weight, loaded_weight)
    finally:
        # Remove results/intermediate data saved to disk
        shutil.rmtree(output_dir, ignore_errors=True)

    try:
        # Training with dataframe
        data_df = read_csv(data_csv)
        _, _, output_dir = model.train(
            dataset=data_df,
            skip_save_processed_input=True,
            skip_save_progress=True,
            skip_save_unprocessed_output=True,
        )
        model.predict(dataset=data_df)
    finally:
        shutil.rmtree(output_dir, ignore_errors=True)
Пример #21
0
def run_test_gbm_number(tmpdir, backend_config):
    """Test that the GBM model can train and predict a numerical output (regression)."""
    # Given a dataset with a single input feature and a single output feature,
    input_features = [number_feature(), category_feature(reduce_output="sum")]
    output_feature = number_feature()
    output_features = [output_feature]

    csv_filename = os.path.join(tmpdir, "training.csv")
    dataset_filename = generate_data(input_features,
                                     output_features,
                                     csv_filename,
                                     num_examples=100)

    config = {
        MODEL_TYPE: "gbm",
        "input_features": input_features,
        "output_features": output_features,
        TRAINER: {
            "num_boost_round": 2
        },
    }

    # When I train a model on the dataset, load the model from the output directory, and
    # predict on the dataset
    model = LudwigModel(config, backend=backend_config)

    model.train(
        dataset=dataset_filename,
        output_directory=tmpdir,
        skip_save_processed_input=True,
        skip_save_progress=True,
        skip_save_unprocessed_output=True,
        skip_save_log=True,
    )
    model.load(os.path.join(tmpdir, "api_experiment_run", "model"))
    preds, _ = model.predict(
        dataset=dataset_filename,
        output_directory=os.path.join(tmpdir, "predictions"),
    )

    # Then the predictions should be included in the output
    pred_col = preds[output_feature["name"] + "_predictions"]
    if backend_config["type"] == "ray":
        pred_col = pred_col.compute()
    assert pred_col.dtype == float
Пример #22
0
 def predict_test_with_ludwig(self):
     ludwig_model = LudwigModel.load(
         "results/api_experiment_imdb_review/model")
     df = pd.read_table("../../test.tsv", sep='\t')
     predictions = ludwig_model.predict(data_df=df)
     predictions.to_csv(index=False,
                        header=True,
                        path_or_buf="predicted.csv")
     print(predictions)
Пример #23
0
def initialize(cfg):
    pth_mdl = os.path.join(cfg['pth_mdls'], cfg['model_to_load'], "model")
    if not os.path.isdir(pth_mdl):
        raise Exception(
            "Could not find the model specified in the models directory: {}".
            format(pth_mdl))

    global MODL, MMTA, FEAS_IN, FEAS_OUT
    # load a model
    st = time.time()
    print("...loading model from {}".format(pth_mdl))
    MODL = LudwigModel.load(pth_mdl)
    MODL.set_logging_level(logging.ERROR)
    with open(os.path.join(pth_mdl, "train_set_metadata.json")) as f:
        MMTA = json.load(f)
    print("...loaded model in {:.2f}s".format(time.time() - st))

    FEAS_IN = [fea for fea in MODL.model.hyperparameters['input_features']]
    FEAS_OUT = [fea for fea in MODL.model.hyperparameters['output_features']]
    input_features_desc, output_features_desc = "", ""
    for fea in FEAS_IN:
        if fea['type'] == "image":
            input_features_desc += "\t{}\t({}: {})\n".format(
                fea['name'], fea['type'], "({}, {}) {}".format(
                    fea['width'], fea['height'],
                    fe.util.chan_count_to_mode(fea['num_channels'])))
        else:
            input_features_desc += "\t{}\t({})\n".format(
                fea['name'], fea['type'])
    for fea in FEAS_OUT:
        if fea['type'] == "category":
            fea['meta'] = MMTA[fea['name']]
            output_features_desc += "\t{}\t(category with {} classes)\n".format(
                fea['name'], fea['num_classes'])
            output_features_desc += "\t\t\t{}\n".format(", ".join(
                fea['meta']['idx2str']))
        else:
            output_features_desc += "\t{}\t({})\n".format(
                fea['name'], fea['type'])

    #print(output_features)
    console_msg = """#################### FRESH EYES ####################
I've just loaded a saved model from {0}
To check that the server is working, go to http://localhost:{1}/
It looks like the model I loaded requires the following inputs to make a prediction:
{2}
Make note of these names, as these particular fields will be required by API calls.
It looks like the following values will result from a prediction.
{3}
Make note of these as well, as these fields will be returned to API calls.
Invoke Cntl+C to stop the server
#################### FRESH EYES ####################
"""
    print(
        console_msg.format(pth_mdl, cfg['port_num'], input_features_desc,
                           output_features_desc))
Пример #24
0
def get_tags(sentence):
    tagger = LudwigModel.load('../tagger')

    sentence = sentence.strip().lower().translate(
        str.maketrans('', '', string.punctuation))
    ret = tagger.predict(
        data_dict={'utterance': [sentence]})['slots_predictions'].values

    tagger.close()

    return ret
Пример #25
0
def run_test_gbm_category(tmpdir, backend_config):
    """Test that the GBM model can train and predict a categorical output (multiclass classification)."""
    input_features = [number_feature(), category_feature(reduce_output="sum")]
    vocab_size = 3
    output_feature = category_feature(vocab_size=vocab_size)
    output_features = [output_feature]

    csv_filename = os.path.join(tmpdir, "training.csv")
    dataset_filename = generate_data(input_features,
                                     output_features,
                                     csv_filename,
                                     num_examples=100)

    config = {
        MODEL_TYPE: "gbm",
        "input_features": input_features,
        "output_features": output_features,
        TRAINER: {
            "num_boost_round": 2
        },
    }

    model = LudwigModel(config, backend=backend_config)

    _, _, output_directory = model.train(
        dataset=dataset_filename,
        output_directory=tmpdir,
        skip_save_processed_input=True,
        skip_save_progress=True,
        skip_save_unprocessed_output=True,
        skip_save_log=True,
    )
    model.load(os.path.join(tmpdir, "api_experiment_run", "model"))
    preds, _ = model.predict(dataset=dataset_filename,
                             output_directory=output_directory)

    prob_col = preds[output_feature["name"] + "_probabilities"]
    if backend_config["type"] == "ray":
        prob_col = prob_col.compute()
    assert len(prob_col.iloc[0]) == (vocab_size + 1)
    assert prob_col.apply(sum).mean() == pytest.approx(1.0)
def make_inference():
    trained_model = LudwigModel.load("results/exp_run/model")
    prediction = trained_model.predict(
        {"image_path":["D:\programs\python\exampl\hand_recognition\infer/right (24).jpg",
        "D:\programs\python\exampl\hand_recognition\infer\left (26).jpg"

        ],

        "label":[-1,-1]
        }
    )
    print(prediction[0]["label_predictions"].values)
Пример #27
0
def run_test_with_ludwig(model_path, test_file_csv):
    """
    Wrap around Ludwig testing.

    :param model_path: path in which already trained model is
    :param test_file_csv: path to csv file with test data points
    :return: predictions from the model and dictionary with stats for human debug
    """
    model = LudwigModel.load(model_path)
    predictions, test_stats = model.test(data_csv=test_file_csv)
    model.close()

    return predictions, test_stats
Пример #28
0
    def train(self):
        training_dataframe, model_definition = self._create_ludwig_dataframe('train')
        if self.transaction.lmd['model_order_by'] is None:
            timeseries_cols = []
        else:
            timeseries_cols = list(map(lambda x: x[0], self.transaction.lmd['model_order_by']))

        if len(timeseries_cols) > 0:
            training_dataframe, model_definition =  self._translate_df_to_timeseries_format(training_dataframe, model_definition, timeseries_cols, 'train')

        with disable_ludwig_output(True):

            model = LudwigModel(model_definition)

            # <---- Ludwig currently broken, since mode can't be initialized without train_set_metadata and train_set_metadata can't be obtained without running train... see this issue for any updates on the matter: https://github.com/uber/ludwig/issues/295
            #model.initialize_model(train_set_metadata={})
            #train_stats = model.train_online(data_df=training_dataframe) # ??Where to add model_name?? ----> model_name=self.transaction.lmd['name']

            if self.transaction.lmd['rebuild_model'] is True:
                train_stats = model.train(data_df=training_dataframe, model_name=self.transaction.lmd['name'], skip_save_model=True)
            else:
                model = LudwigModel.load(self.transaction.lmd['ludwig_data']['ludwig_save_path'])
                train_stats = model.train(data_df=training_dataframe, model_name=self.transaction.lmd['name'], skip_save_model=True)
                #,model_load_path=self.transaction.lmd['ludwig_data']['ludwig_save_path'])

            for k in train_stats['train']:
                if k not in self.transaction.lmd['model_accuracy']['train']:
                    self.transaction.lmd['model_accuracy']['train'][k] = []
                    self.transaction.lmd['model_accuracy']['test'][k] = []
                elif k is not 'combined':
                    # We should be adding the accuracy here but we only have it for combined, so, for now use that, will only affect multi-output scenarios anyway
                    pass
                else:
                    self.transaction.lmd['model_accuracy']['train'][k].extend(train_stats['train'][k]['accuracy'])
                    self.transaction.lmd['model_accuracy']['test'][k].extend(train_stats['test'][k]['accuracy'])

                '''
                @ TRAIN ONLINE BIT That's not working
                model = LudwigModel.load(self.transaction.lmd['ludwig_data']['ludwig_save_path'])
                for i in range(0,100):
                    train_stats = model.train_online(data_df=training_dataframe)
                    # The resulting train_stats are "None"... wonderful -_-
                '''

            ludwig_model_savepath = os.path.join(CONFIG.MINDSDB_STORAGE_PATH, self.transaction.lmd['name'] + '_ludwig_data')

        model.save(ludwig_model_savepath)
        model.close()

        self.transaction.lmd['ludwig_data'] = {'ludwig_save_path': ludwig_model_savepath}
        self.transaction.hmd['ludwig_data'] = {'model_definition': model_definition}
Пример #29
0
def do_predictions(prediction_dictionary, target_folder):
    # reload the model
    model = LudwigModel.load(target_folder)
    # get predictions
    predictions = model.predict(data_dict=prediction_dictionary)
    for input_q, input_skus, output in zip(prediction_dictionary['query'],
                               prediction_dictionary['skus_in_session'],
                               predictions['path_predictions']):
        print("\nInput: <{}, {}>, predicted path: {}".format(input_q,
                                                           input_skus,
                                                           ' > '.join([o for o in output if o != '<PAD>'])
                                                           ))

    return
Пример #30
0
 def predict_test_with_ludwig(self):
     ludwig_model = LudwigModel.load(
         "results/api_experiment_cancer_diagnosis/model")
     df = pd.read_csv("../../processed_data/test.csv")
     df.rename(columns={
         "concave points_se": "concave_points_se",
         "concave points_worst": "concave_points_worst"
     },
               inplace=True)
     predictions = ludwig_model.predict(data_df=df, )
     predictions.to_csv(index=False,
                        header=True,
                        path_or_buf="predicted.csv")
     print(predictions)