Exemplo n.º 1
0
def export_neuropod(
    ludwig_model_path,
    neuropod_path,
    neuropod_model_name="ludwig_model",
    entrypoint="get_model",
):
    try:
        from neuropod.backends.python.packager import create_python_neuropod
    except ImportError:
        raise RuntimeError(
            'The "neuropod" package is not installed in your environment.')

    data_paths = [
        {
            "path":
            os.path.join(ludwig_model_path, MODEL_HYPERPARAMETERS_FILE_NAME),
            "packaged_name":
            MODEL_HYPERPARAMETERS_FILE_NAME,
        },
        {
            "path": os.path.join(ludwig_model_path,
                                 TRAIN_SET_METADATA_FILE_NAME),
            "packaged_name": TRAIN_SET_METADATA_FILE_NAME,
        },
        {
            "path": os.path.join(ludwig_model_path, "checkpoint"),
            "packaged_name": "checkpoint"
        },
    ]
    for filename in os.listdir(ludwig_model_path):
        if MODEL_WEIGHTS_FILE_NAME in filename:
            data_paths.append({
                "path": os.path.join(ludwig_model_path, filename),
                "packaged_name": filename
            })

    logger.debug(f"data_paths: {data_paths}")

    ludwig_config = load_json(
        os.path.join(ludwig_model_path, MODEL_HYPERPARAMETERS_FILE_NAME))
    training_set_metadata = load_json(
        os.path.join(ludwig_model_path, TRAIN_SET_METADATA_FILE_NAME))

    input_spec = []
    for feature in ludwig_config["input_features"]:
        input_spec.append({
            NAME: feature[NAME],
            "dtype": "str",
            "shape": (None, 1)
        })
    logger.debug(f"input_spec: {input_spec}")

    output_spec = []
    for feature in ludwig_config["output_features"]:
        feature_type = feature[TYPE]
        feature_name = feature[NAME]
        if feature_type == BINARY:
            output_spec.append({
                "name": feature_name + "_predictions",
                "dtype": "str",
                "shape": (None, 1)
            })
            output_spec.append({
                "name": feature_name + "_probability",
                "dtype": "float64",
                "shape": (None, 1)
            })
            output_spec.append({
                "name": feature_name + "_probabilities",
                "dtype": "float64",
                "shape": (None, 2)
            })
        elif feature_type == NUMERICAL:
            output_spec.append({
                "name": feature_name + "_predictions",
                "dtype": "float64",
                "shape": (None, 1)
            })
        elif feature_type == CATEGORY:
            output_spec.append({
                "name": feature_name + "_predictions",
                "dtype": "str",
                "shape": (None, 1)
            })
            output_spec.append({
                "name": feature_name + "_probability",
                "dtype": "float64",
                "shape": (None, 1)
            })
            output_spec.append({
                "name":
                feature_name + "_probabilities",
                "dtype":
                "float64",
                "shape":
                (None, training_set_metadata[feature[NAME]]["vocab_size"]),
            })
        elif feature_type == SEQUENCE or feature_type == TEXT:
            output_spec.append({
                "name": feature_name + "_predictions",
                "dtype": "str",
                "shape": (None, 1)
            })
        elif feature_type == SET:
            output_spec.append({
                "name": feature_name + "_predictions",
                "dtype": "str",
                "shape": (None, 1)
            })
            output_spec.append({
                "name": feature_name + "_probability",
                "dtype": "str",
                "shape": (None, 1)
            })
            output_spec.append({
                "name":
                feature_name + "_probabilities",
                "dtype":
                "float64",
                "shape":
                (None, training_set_metadata[feature[NAME]]["vocab_size"]),
            })
        elif feature_type == VECTOR:
            output_spec.append({
                "name":
                feature["name"] + "_predictions",
                "dtype":
                "float64",
                "shape":
                (None, training_set_metadata[feature[NAME]]["vector_size"]),
            })
        else:
            output_spec.append({
                "name": feature["name"] + "_predictions",
                "dtype": "str",
                "shape": (None, 1)
            })
    logger.debug(f"output_spec: {output_spec}")

    if os.path.exists(neuropod_path):
        if os.path.isfile(neuropod_path):
            logger.warning(f"Removing file: {neuropod_path}")
            os.remove(neuropod_path)
        else:
            logger.warning(f"Removing directory: {neuropod_path}")
            shutil.rmtree(neuropod_path, ignore_errors=True)

    from pathlib import Path

    path = Path(ludwig_path)
    logger.debug(f"python_root: {path.parent.parent}")

    create_python_neuropod(
        neuropod_path=neuropod_path,
        model_name=neuropod_model_name,
        data_paths=data_paths,
        code_path_spec=[{
            "python_root": path.parent.parent,
            "dirs_to_package":
            ["ludwig"],  # Package everything in the python_root
        }],
        entrypoint_package="ludwig.utils.neuropod_utils",
        entrypoint=entrypoint,
        skip_virtualenv=True,
        input_spec=input_spec,
        output_spec=output_spec,
    )
    logger.info(f"Neuropod saved to: {neuropod_path}")
Exemplo n.º 2
0
def export_neuropod(ludwig_model_path,
                    neuropod_path,
                    neuropod_model_name="ludwig_model"):
    try:
        from neuropod.backends.python.packager import create_python_neuropod
    except ImportError:
        logger.error(
            'The "neuropod" package is not installed in your environment.')
        sys.exit(-1)

    data_paths = [
        {
            "path":
            os.path.join(ludwig_model_path, MODEL_HYPERPARAMETERS_FILE_NAME),
            "packaged_name":
            MODEL_HYPERPARAMETERS_FILE_NAME
        },
        {
            "path": os.path.join(ludwig_model_path,
                                 TRAIN_SET_METADATA_FILE_NAME),
            "packaged_name": TRAIN_SET_METADATA_FILE_NAME
        },
        {
            "path": os.path.join(ludwig_model_path, 'checkpoint'),
            "packaged_name": 'checkpoint'
        },
    ]
    for filename in os.listdir(ludwig_model_path):
        if (MODEL_WEIGHTS_FILE_NAME in filename
                and MODEL_WEIGHTS_PROGRESS_FILE_NAME not in filename):
            data_paths.append({
                "path": os.path.join(ludwig_model_path, filename),
                "packaged_name": filename
            })

    logger.debug('data_paths: {}'.format(data_paths))

    ludwig_model_definition = load_json(
        os.path.join(ludwig_model_path, MODEL_HYPERPARAMETERS_FILE_NAME))
    training_set_metadata = load_json(
        os.path.join(ludwig_model_path, TRAIN_SET_METADATA_FILE_NAME))

    input_spec = []
    for feature in ludwig_model_definition['input_features']:
        input_spec.append({
            "name": feature['name'],
            "dtype": "str",
            "shape": (None, 1)
        })
    logger.debug('input_spec: {}'.format(input_spec))

    output_spec = []
    for feature in ludwig_model_definition['output_features']:
        feature_type = feature['type']
        feature_name = feature['name']
        if feature_type == BINARY:
            output_spec.append({
                "name": feature['name'] + '_predictions',
                "dtype": "str",
                "shape": (None, 1)
            })
            output_spec.append({
                "name": feature['name'] + '_probabilities',
                "dtype": "float32",
                "shape": (None, 1)
            })
        elif feature_type == NUMERICAL:
            output_spec.append({
                "name": feature['name'] + '_predictions',
                "dtype": "float32",
                "shape": (None, 1)
            })
        elif feature_type == CATEGORY:
            output_spec.append({
                "name": feature['name'] + '_predictions',
                "dtype": "str",
                "shape": (None, 1)
            })
            output_spec.append({
                "name": feature['name'] + '_probability',
                "dtype": "float32",
                "shape": (None, 1)
            })
            output_spec.append({
                "name":
                feature['name'] + '_probabilities',
                "dtype":
                "float32",
                "shape":
                (None, training_set_metadata[feature_name]['vocab_size'])
            })
        elif feature_type == SEQUENCE:
            output_spec.append({
                "name": feature['name'] + '_predictions',
                "dtype": "str",
                "shape": (None, 1)
            })
        elif feature_type == TEXT:
            output_spec.append({
                "name": feature['name'] + '_predictions',
                "dtype": "str",
                "shape": (None, 1)
            })
        elif feature_type == SET:
            output_spec.append({
                "name": feature['name'] + '_predictions',
                "dtype": "str",
                "shape": (None, 1)
            })
            output_spec.append({
                "name": feature['name'] + '_probability',
                "dtype": "str",
                "shape": (None, 1)
            })
            output_spec.append({
                "name":
                feature['name'] + '_probabilities',
                "dtype":
                "float32",
                "shape":
                (None, training_set_metadata[feature_name]['vocab_size'])
            })
        elif feature_type == VECTOR:
            output_spec.append({
                "name":
                feature['name'] + '_predictions',
                "dtype":
                "float32",
                "shape":
                (None, training_set_metadata[feature_name]['vector_size'])
            })
        else:
            output_spec.append({
                "name": feature['name'] + '_predictions',
                "dtype": "str",
                "shape": (None, 1)
            })
    logger.debug('output_spec: {}'.format(output_spec))

    if os.path.exists(neuropod_path):
        if os.path.isfile(neuropod_path):
            logger.warning('Removing file: {}'.format(neuropod_path))
            os.remove(neuropod_path)
        else:
            logger.warning('Removing directory: {}'.format(neuropod_path))
            shutil.rmtree(neuropod_path, ignore_errors=True)

    from pathlib import Path
    path = Path(ludwig_path)
    logger.debug('python_root: {}'.format(path.parent.parent))

    create_python_neuropod(
        neuropod_path=neuropod_path,
        model_name=neuropod_model_name,
        data_paths=data_paths,
        code_path_spec=[{
            "python_root":
            path.parent.parent,
            "dirs_to_package": [
                "ludwig"  # Package everything in the python_root
            ],
        }],
        entrypoint_package="ludwig.neuropod_export",
        entrypoint="get_model",
        # test_deps=['torch', 'numpy'],
        skip_virtualenv=True,
        input_spec=input_spec,
        output_spec=output_spec)
    logger.info('Neuropod saved to: {}'.format(neuropod_path))