示例#1
0
def full_predict(
        model_path,
        data_csv=None,
        data_hdf5=None,
        dataset_type='generic',
        split='test',
        batch_size=128,
        skip_save_unprocessed_output=False,
        output_directory='results',
        only_predictions=False,
        gpus=None,
        gpu_fraction=1.0,
        use_horovod=False,
        debug=False,
        **kwargs
):
    # setup directories and file names
    experiment_dir_name = output_directory
    suffix = 0
    while os.path.exists(experiment_dir_name):
        experiment_dir_name = output_directory + '_' + str(suffix)
        suffix += 1

    if is_on_master():
        logging.info('Dataset type: {}'.format(dataset_type))
        logging.info('Dataset path: {}'.format(
            data_csv if data_csv is not None else data_hdf5))
        logging.info('Model path: {}'.format(model_path))
        logging.info('Output path: {}'.format(experiment_dir_name))
        logging.info('')

    train_set_metadata_json_fp = os.path.join(
        model_path,
        TRAIN_SET_METADATA_FILE_NAME
    )

    # preprocessing
    dataset, train_set_metadata = preprocess_for_prediction(
        model_path,
        split,
        dataset_type,
        data_csv,
        data_hdf5,
        train_set_metadata_json_fp,
        only_predictions
    )

    # run the prediction
    if is_on_master():
        print_boxed('LOADING MODEL')
    model, model_definition = load_model_and_definition(model_path,
                                                        use_horovod=use_horovod)

    prediction_results = predict(
        dataset,
        train_set_metadata,
        model,
        model_definition,
        batch_size,
        only_predictions,
        gpus,
        gpu_fraction,
        debug
    )
    model.close_session()

    if is_on_master():
        os.mkdir(experiment_dir_name)

        # postprocess
        postprocessed_output = postprocess(
            prediction_results,
            model_definition['output_features'],
            train_set_metadata,
            experiment_dir_name,
            skip_save_unprocessed_output or not is_on_master()
        )

        save_prediction_outputs(postprocessed_output, experiment_dir_name)

        if not only_predictions:
            print_prediction_results(prediction_results)
            save_prediction_statistics(prediction_results, experiment_dir_name)

        logging.info('Saved to: {0}'.format(experiment_dir_name))
示例#2
0
def collect_activations(model_path,
                        tensors,
                        data_csv=None,
                        data_hdf5=None,
                        split='test',
                        batch_size=128,
                        output_directory='results',
                        gpus=None,
                        gpu_fraction=1.0,
                        debug=False,
                        **kwargs):
    """Uses the pretrained model to collect the tensors corresponding to a
    datapoint in the dataset. Saves the tensors to the experiment directory

    :param model_path: Is the model from which the tensors will be collected
    :param tensors: List contaning the names of the tensors to collect
    :param data_csv: The CSV filepath which contains the datapoints from which
           the tensors are collected
    :param data_hdf5: The HDF5 file path if the CSV file path does not exist,
           an alternative source of providing the data to the model
    :param split: Split type
    :param batch_size: Batch size
    :param output_directory: Output directory
    :param gpus: The total number of GPUs that the model intends to use
    :param gpu_fraction: The fraction of each GPU that the model intends on
           using
    :param debug: To step through the stack traces and find possible errors
    :returns: None

    """
    # setup directories and file names
    experiment_dir_name = output_directory
    suffix = 0
    while os.path.exists(experiment_dir_name):
        experiment_dir_name = output_directory + '_' + str(suffix)
        suffix += 1

    logger.info('Dataset path: {}'.format(
        data_csv if data_csv is not None else data_hdf5))
    logger.info('Model path: {}'.format(model_path))
    logger.info('Output path: {}'.format(experiment_dir_name))
    logger.info('\n')

    train_set_metadata_fp = os.path.join(model_path,
                                         TRAIN_SET_METADATA_FILE_NAME)

    # preprocessing
    dataset, train_set_metadata = preprocess_for_prediction(
        model_path, split, data_csv, data_hdf5, train_set_metadata_fp)

    model, model_definition = load_model_and_definition(model_path)

    # collect activations
    print_boxed('COLLECT ACTIVATIONS')
    collected_tensors = model.collect_activations(dataset,
                                                  tensors,
                                                  batch_size,
                                                  gpus=gpus,
                                                  gpu_fraction=gpu_fraction)

    model.close_session()

    # saving
    os.mkdir(experiment_dir_name)
    save_tensors(collected_tensors, experiment_dir_name)

    logger.info('Saved to: {0}'.format(experiment_dir_name))
示例#3
0
文件: train.py 项目: wangrandk/ludwig
def train(
        training_set,
        validation_set,
        test_set,
        model_definition,
        save_path='model',
        model_load_path=None,
        resume=False,
        skip_save_model=False,
        skip_save_progress=False,
        skip_save_log=False,
        gpus=None,
        gpu_fraction=1.0,
        use_horovod=False,
        random_seed=default_random_seed,
        debug=False
):
    """
    :param training_set: Dataset contaning training data
    :type training_set: Dataset
    :param validation_set: Dataset contaning validation data
    :type validation_set: Datasetk
    :param test_set: Dataset contaning test data.
    :type test_set: Dataset
    :param model_definition: Model definition which defines the different
           parameters of the model, features, preprocessing and training.
    :type model_definition: Dictionary
    :param save_path: The path to save the model to.
    :type save_path: filepath (str)
    :param model_load_path: If this is specified the loaded model will be used
           as initialization (useful for transfer learning).
    :type model_load_path: filepath (str)
    :param skip_save_model: Disables
               saving model weights and hyperparameters each time the model
           improves. By default Ludwig saves model weights after each epoch
           the validation measure imrpvoes, but if the model is really big
           that can be time consuming if you do not want to keep
           the weights and just find out what performance can a model get
           with a set of hyperparameters, use this parameter to skip it,
           but the model will not be loadable later on.
    :type skip_save_model: Boolean
    :param skip_save_progress: Disables saving
           progress each epoch. By default Ludwig saves weights and stats
           after each epoch for enabling resuming of training, but if
           the model is really big that can be time consuming and will uses
           twice as much space, use this parameter to skip it, but training
           cannot be resumed later on.
    :type skip_save_progress: Boolean
    :param skip_save_log: Disables saving TensorBoard
           logs. By default Ludwig saves logs for the TensorBoard, but if it
           is not needed turning it off can slightly increase the
           overall speed..
    :type skip_save_log: Boolean
    :param gpus: List of GPUs that are available for training.
    :type gpus: List
    :param gpu_fraction: Fraction of the memory of each GPU to use at
           the beginning of the training. The memory may grow elastically.
    :type gpu_fraction: Integer
    :param random_seed: Random seed used for weights initialization,
           splits and any other random function.
    :type random_seed: Integer
    :param debug: If true turns on tfdbg with inf_or_nan checks.
    :type debug: Boolean
    :returns: None
    """
    if model_load_path is not None:
        # Load model
        if is_on_master():
            print_boxed('LOADING MODEL')
            logger.info('Loading model: {}\n'.format(model_load_path))
        model, _ = load_model_and_definition(model_load_path)
    else:
        # Build model
        if is_on_master():
            print_boxed('BUILDING MODEL', print_fun=logger.debug)

        model = Model(
            model_definition['input_features'],
            model_definition['output_features'],
            model_definition['combiner'],
            model_definition['training'],
            model_definition['preprocessing'],
            use_horovod=use_horovod,
            random_seed=random_seed,
            debug=debug
        )

    contrib_command("train_model", model, model_definition, model_load_path)

    # Train model
    if is_on_master():
        print_boxed('TRAINING')
    return model, model.train(
        training_set,
        validation_set=validation_set,
        test_set=test_set,
        save_path=save_path,
        resume=resume,
        skip_save_model=skip_save_model,
        skip_save_progress=skip_save_progress,
        skip_save_log=skip_save_log,
        gpus=gpus, gpu_fraction=gpu_fraction,
        random_seed=random_seed,
        **model_definition['training']
    )
示例#4
0
def full_predict(model_path,
                 data_csv=None,
                 data_hdf5=None,
                 split=TEST,
                 batch_size=128,
                 skip_save_unprocessed_output=False,
                 skip_save_test_predictions=False,
                 skip_save_test_statistics=False,
                 output_directory='results',
                 evaluate_performance=True,
                 gpus=None,
                 gpu_fraction=1.0,
                 use_horovod=False,
                 debug=False,
                 **kwargs):
    if is_on_master():
        logger.info('Dataset path: {}'.format(
            data_csv if data_csv is not None else data_hdf5))
        logger.info('Model path: {}'.format(model_path))
        logger.info('')

    train_set_metadata_json_fp = os.path.join(model_path,
                                              TRAIN_SET_METADATA_FILE_NAME)

    # preprocessing
    dataset, train_set_metadata = preprocess_for_prediction(
        model_path, split, data_csv, data_hdf5, train_set_metadata_json_fp,
        evaluate_performance)

    # run the prediction
    if is_on_master():
        print_boxed('LOADING MODEL')
    model, model_definition = load_model_and_definition(
        model_path, use_horovod=use_horovod)

    prediction_results = predict(dataset, train_set_metadata, model,
                                 model_definition, batch_size,
                                 evaluate_performance, gpus, gpu_fraction,
                                 debug)
    model.close_session()

    if is_on_master():
        # setup directories and file names
        experiment_dir_name = find_non_existing_dir_by_adding_suffix(
            output_directory)

        # if we are skipping all saving,
        # there is no need to create a directory that will remain empty
        should_create_exp_dir = not (skip_save_unprocessed_output
                                     and skip_save_test_predictions
                                     and skip_save_test_statistics)
        if should_create_exp_dir:
            os.makedirs(experiment_dir_name)

        # postprocess
        postprocessed_output = postprocess(
            prediction_results, model_definition['output_features'],
            train_set_metadata, experiment_dir_name,
            skip_save_unprocessed_output or not is_on_master())

        if not skip_save_test_predictions:
            save_prediction_outputs(postprocessed_output, experiment_dir_name)

        if evaluate_performance:
            print_test_results(prediction_results)
            if not skip_save_test_statistics:
                save_test_statistics(prediction_results, experiment_dir_name)

        logger.info('Saved to: {0}'.format(experiment_dir_name))