Exemplo n.º 1
0
def get_experiment_dir_name(output_directory,
                            experiment_name,
                            model_name='run',
                            append_suffix=True):
    results_dir = output_directory
    # create results dir if it does not exist
    if is_on_master():
        if not os.path.isdir(results_dir):
            os.mkdir(results_dir)

    # create a base dir name
    base_dir_name = os.path.join(
        results_dir,
        experiment_name + ('_' if model_name else '') + model_name)

    if append_suffix:
        # look for an unused suffix
        suffix = 0
        found_previous_results = os.path.isdir('{base}_{suffix}'.format(
            base=base_dir_name, suffix=suffix))

        while found_previous_results:
            suffix += 1
            found_previous_results = os.path.isdir('{base}_{suffix}'.format(
                base=base_dir_name, suffix=suffix))

        # found an unused suffix, build the basic dir name
        dir_name = '{base}_{suffix}'.format(base=base_dir_name, suffix=suffix)
    else:
        dir_name = base_dir_name

    return dir_name
Exemplo n.º 2
0
    def postprocess_results(
            output_feature,
            result,
            metadata,
            experiment_dir_name,
            skip_save_unprocessed_output=False,
    ):
        postprocessed = {}
        name = output_feature['name']

        npy_filename = None
        if is_on_master():
            npy_filename = os.path.join(experiment_dir_name, '{}_{}.npy')
        else:
            skip_save_unprocessed_output = True

        if PREDICTIONS in result and len(result[PREDICTIONS]) > 0:
            postprocessed[PREDICTIONS] = result[PREDICTIONS].numpy()
            if not skip_save_unprocessed_output:
                np.save(
                    npy_filename.format(name, PREDICTIONS),
                    result[PREDICTIONS]
                )
            del result[PREDICTIONS]

        return postprocessed
Exemplo n.º 3
0
def predict(dataset,
            train_set_metadata,
            model,
            model_definition,
            batch_size=128,
            evaluate_performance=True,
            debug=False):
    """Computes predictions based on the computed model.
        :param dataset: Dataset containing the data to calculate
               the predictions from.
        :type dataset: Dataset
        :param model: The trained model used to produce the predictions.
        :type model: Model
        :param model_definition: The model definition of the model to use
               for obtaining predictions
        :type model_definition: Dictionary
        :param batch_size: The size of batches when computing the predictions.
        :type batch_size: Integer
        :param evaluate_performance: If this parameter is False, only the predictions
               will be returned, if it is True, also performance metrics
               will be calculated on the predictions. It requires the data
               to contain also ground truth for the output features, otherwise
               the metrics cannot be computed.
        :type evaluate_performance: Bool
        :param debug: If true turns on tfdbg with inf_or_nan checks.
        :type debug: Boolean

        :returns: A dictionary containing the predictions of each output feature,
                  alongside with statistics on the quality of those predictions
                  (if evaluate_performance is True).
        """
    if is_on_master():
        print_boxed('PREDICT')

    test_stats, test_predictions = model.predict(
        dataset, batch_size, evaluate_performance=evaluate_performance)

    if not test_stats:
        test_stats = {}

    # combine predictions with the overall metrics
    for of_name in test_predictions:
        # remove logits, not needed for overall stats
        if LOGITS in test_predictions[of_name]:
            del test_predictions[of_name][LOGITS]

        if of_name not in test_stats:
            test_stats[of_name] = {}

        test_stats[of_name] = {
            **test_stats[of_name],
            **test_predictions[of_name]
        }

    if evaluate_performance:
        calculate_overall_stats(test_stats,
                                model_definition['output_features'], dataset,
                                train_set_metadata)

    return test_stats
Exemplo n.º 4
0
def predict(
        dataset,
        train_set_metadata,
        model,
        model_definition,
        batch_size=128,
        only_predictions=False,
        gpus=None,
        gpu_fraction=1.0,
        debug=False
):
    """Computes predictions based on the computed model.
        :param dataset: Dataset contaning the data to calculate
               the predictions from.
        :type dataset: Dataset
        :param model: The trained model used to produce the predictions.
        :type model: Model
        :param model_definition: The model definition of the model to use
               for obtaining predictions
        :type model_definition: Dictionary
        :param batch_size: The size of batches when computing the predictions.
        :type batch_size: Integer
        :param only_predictions: If this parameter is True, only the predictions
               will be returned, if it is False, also performance metrics
               will be calculated on the predictions. It requires the data
               to contanin also ground truth for the output features, otherwise
               the metrics cannot be computed.
        :type only_predictions: Bool
        :type gpus: List
        :type gpu_fraction: Integer
        :param debug: If true turns on tfdbg with inf_or_nan checks.
        :type debug: Boolean

        :returns: A dictionary contaning the predictions of each output feature,
                  alongside with statistics on the quality of those predictions
                  (if only_predictions is False).
        """
    if is_on_master():
        print_boxed('PREDICT')
    test_stats = model.predict(
        dataset,
        batch_size,
        only_predictions=only_predictions,
        gpus=gpus,
        gpu_fraction=gpu_fraction
    )

    if not only_predictions:
        calculate_overall_stats(
            test_stats,
            model_definition['output_features'],
            dataset,
            train_set_metadata
        )

    return test_stats
Exemplo n.º 5
0
def get_file_names(experiment_dir_name):
    if is_on_master():
        if not os.path.exists(experiment_dir_name):
            os.mkdir(experiment_dir_name)

    description_fn = os.path.join(experiment_dir_name, 'description.json')
    training_stats_fn = os.path.join(experiment_dir_name,
                                     'training_statistics.json')

    model_dir = os.path.join(experiment_dir_name, 'model')

    return description_fn, training_stats_fn, model_dir
Exemplo n.º 6
0
    def postprocess_results(
        output_feature,
        result,
        metadata,
        experiment_dir_name,
        skip_save_unprocessed_output=False,
    ):
        postprocessed = {}
        name = output_feature['name']

        npy_filename = None
        if is_on_master():
            npy_filename = os.path.join(experiment_dir_name, '{}_{}.npy')
        else:
            skip_save_unprocessed_output = True

        if PREDICTIONS in result and len(result[PREDICTIONS]) > 0:
            preds = result[PREDICTIONS]
            if 'idx2str' in metadata:
                postprocessed[PREDICTIONS] = [[
                    metadata['idx2str'][i] for i, pred in enumerate(pred_set)
                    if pred == True
                ] for pred_set in preds]
            else:
                postprocessed[PREDICTIONS] = preds

            if not skip_save_unprocessed_output:
                np.save(npy_filename.format(name, PREDICTIONS), preds)

            del result[PREDICTIONS]

        if PROBABILITIES in result and len(result[PROBABILITIES]) > 0:
            probs = result[PROBABILITIES].numpy()
            prob = [[
                prob for prob in prob_set
                if prob >= output_feature['threshold']
            ] for prob_set in probs]
            postprocessed[PROBABILITIES] = probs
            postprocessed[PROBABILITY] = prob

            if not skip_save_unprocessed_output:
                np.save(npy_filename.format(name, PROBABILITIES), probs)
                np.save(npy_filename.format(name, PROBABILITY), probs)

            del result[PROBABILITIES]

        return postprocessed
Exemplo n.º 7
0
def hyperopt(
    model_definition=None,
    model_definition_file=None,
    data_df=None,
    data_train_df=None,
    data_validation_df=None,
    data_test_df=None,
    data_csv=None,
    data_train_csv=None,
    data_validation_csv=None,
    data_test_csv=None,
    data_hdf5=None,
    data_train_hdf5=None,
    data_validation_hdf5=None,
    data_test_hdf5=None,
    train_set_metadata_json=None,
    experiment_name="hyperopt",
    model_name="run",
    # model_load_path=None,
    # model_resume_path=None,
    skip_save_training_description=True,
    skip_save_training_statistics=True,
    skip_save_model=True,
    skip_save_progress=True,
    skip_save_log=True,
    skip_save_processed_input=True,
    skip_save_unprocessed_output=True,
    skip_save_test_predictions=True,
    skip_save_test_statistics=True,
    skip_save_hyperopt_statistics=False,
    output_directory="results",
    gpus=None,
    gpu_memory_limit=None,
    allow_parallel_threads=True,
    use_horovod=False,
    random_seed=default_random_seed,
    debug=False,
    **kwargs,
):
    # check for model_definition and model_definition_file
    if model_definition is None and model_definition_file is None:
        raise ValueError(
            "Either model_definition of model_definition_file have to be"
            "not None to initialize a LudwigModel")
    if model_definition is not None and model_definition_file is not None:
        raise ValueError("Only one between model_definition and "
                         "model_definition_file can be provided")

    # merge with default model definition to set defaults
    if model_definition_file is not None:
        with open(model_definition_file, "r") as def_file:
            model_definition = yaml.safe_load(def_file)
    model_definition = merge_with_defaults(model_definition)

    if HYPEROPT not in model_definition:
        raise ValueError("Hyperopt Section not present in Model Definition")

    hyperopt_config = model_definition["hyperopt"]
    update_hyperopt_params_with_defaults(hyperopt_config)

    # print hyperopt config
    logger.info(pformat(hyperopt_config, indent=4))
    logger.info('\n')

    sampler = hyperopt_config["sampler"]
    executor = hyperopt_config["executor"]
    parameters = hyperopt_config["parameters"]
    split = hyperopt_config["split"]
    output_feature = hyperopt_config["output_feature"]
    metric = hyperopt_config["metric"]
    goal = hyperopt_config["goal"]

    ######################
    # check validity of output_feature / metric/ split combination
    ######################
    if split == TRAINING:
        if not (data_train_df or data_train_csv or data_train_hdf5) and (
                model_definition['preprocessing']['split_probabilities'][0] <=
                0):
            raise ValueError(
                'The data for the specified split for hyperopt "{}" '
                'was not provided, '
                'or the split amount specified in the preprocessing section '
                'of the model definition is not greater than 0'.format(split))
    elif split == VALIDATION:
        if not (data_validation_df or data_validation_csv
                or data_validation_hdf5) and (
                    model_definition['preprocessing']['split_probabilities'][1]
                    <= 0):
            raise ValueError(
                'The data for the specified split for hyperopt "{}" '
                'was not provided, '
                'or the split amount specified in the preprocessing section '
                'of the model definition is not greater than 0'.format(split))
    elif split == TEST:
        if not (data_test_df or data_test_csv or data_test_hdf5) and (
                model_definition['preprocessing']['split_probabilities'][2] <=
                0):
            raise ValueError(
                'The data for the specified split for hyperopt "{}" '
                'was not provided, '
                'or the split amount specified in the preprocessing section '
                'of the model definition is not greater than 0'.format(split))
    else:
        raise ValueError('unrecognized hyperopt split "{}". '
                         'Please provide one of: {}'.format(
                             split, {TRAINING, VALIDATION, TEST}))
    if output_feature == COMBINED:
        if metric != LOSS:
            raise ValueError(
                'The only valid metric for "combined" output feature is "loss"'
            )
    else:
        output_feature_names = set(
            of['name'] for of in model_definition['output_features'])
        if output_feature not in output_feature_names:
            raise ValueError('The output feature specified for hyperopt "{}" '
                             'cannot be found in the model definition. '
                             'Available ones are: {} and "combined"'.format(
                                 output_feature, output_feature_names))

        output_feature_type = None
        for of in model_definition['output_features']:
            if of['name'] == output_feature:
                output_feature_type = of['type']
        feature_class = get_from_registry(output_feature_type,
                                          output_type_registry)
        if metric not in feature_class.metric_functions:
            # TODO allow users to specify also metrics from the overall
            #  and per class metrics from the trainign stats and in general
            #  and potprocessed metric
            raise ValueError(
                'The specified metric for hyperopt "{}" is not a valid metric '
                'for the specified output feature "{}" of type "{}". '
                'Available metrics are: {}'.format(metric, output_feature,
                                                   output_feature_type,
                                                   available_metrics))

    hyperopt_sampler = get_build_hyperopt_sampler(sampler["type"])(goal,
                                                                   parameters,
                                                                   **sampler)
    hyperopt_executor = get_build_hyperopt_executor(executor["type"])(
        hyperopt_sampler, output_feature, metric, split, **executor)

    hyperopt_results = hyperopt_executor.execute(
        model_definition,
        data_df=data_df,
        data_train_df=data_train_df,
        data_validation_df=data_validation_df,
        data_test_df=data_test_df,
        data_csv=data_csv,
        data_train_csv=data_train_csv,
        data_validation_csv=data_validation_csv,
        data_test_csv=data_test_csv,
        data_hdf5=data_hdf5,
        data_train_hdf5=data_train_hdf5,
        data_validation_hdf5=data_validation_hdf5,
        data_test_hdf5=data_test_hdf5,
        train_set_metadata_json=train_set_metadata_json,
        experiment_name=experiment_name,
        model_name=model_name,
        # model_load_path=None,
        # model_resume_path=None,
        skip_save_training_description=skip_save_training_description,
        skip_save_training_statistics=skip_save_training_statistics,
        skip_save_model=skip_save_model,
        skip_save_progress=skip_save_progress,
        skip_save_log=skip_save_log,
        skip_save_processed_input=skip_save_processed_input,
        skip_save_unprocessed_output=skip_save_unprocessed_output,
        skip_save_test_predictions=skip_save_test_predictions,
        skip_save_test_statistics=skip_save_test_statistics,
        output_directory=output_directory,
        gpus=gpus,
        gpu_memory_limit=gpu_memory_limit,
        allow_parallel_threads=allow_parallel_threads,
        use_horovod=use_horovod,
        random_seed=random_seed,
        debug=debug,
        **kwargs)

    if is_on_master():
        print_hyperopt_results(hyperopt_results)

        if not skip_save_hyperopt_statistics:
            if not os.path.exists(output_directory):
                os.makedirs(output_directory)

            hyperopt_stats = {
                'hyperopt_config': hyperopt_config,
                'hyperopt_results': hyperopt_results
            }

            save_hyperopt_stats(hyperopt_stats, output_directory)
            logger.info('Hyperopt stats saved to: {}'.format(output_directory))

    logger.info('Finished hyperopt')

    return hyperopt_results
Exemplo n.º 8
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script searches for optimal Hyperparameters",
        prog="ludwig hyperopt",
        usage="%(prog)s [options]",
    )

    # -------------------
    # Hyperopt parameters
    # -------------------
    parser.add_argument(
        "-sshs",
        "--skip_save_hyperopt_statistics",
        help="skips saving hyperopt statistics file",
        action="store_true",
        default=False,
    )

    # ----------------------------
    # Experiment naming parameters
    # ----------------------------
    parser.add_argument(
        "--output_directory",
        type=str,
        default="results",
        help="directory that contains the results",
    )
    parser.add_argument("--experiment_name",
                        type=str,
                        default="hyperopt",
                        help="experiment name")
    parser.add_argument("--model_name",
                        type=str,
                        default="run",
                        help="name for the model")

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        "--data_csv",
        help="input data CSV file. "
        "If it has a split column, it will be used for splitting "
        "(0: train, 1: validation, 2: test), "
        "otherwise the dataset will be randomly split",
    )
    parser.add_argument("--data_train_csv", help="input train data CSV file")
    parser.add_argument("--data_validation_csv",
                        help="input validation data CSV file")
    parser.add_argument("--data_test_csv", help="input test data CSV file")

    parser.add_argument(
        "--data_hdf5",
        help="input data HDF5 file. It is an intermediate preprocess version of"
        " the input CSV created the first time a CSV file is used in the "
        "same directory with the same name and a hdf5 extension",
    )
    parser.add_argument(
        "--data_train_hdf5",
        help="input train data HDF5 file. It is an intermediate preprocess "
        "version of the input CSV created the first time a CSV file is "
        "used in the same directory with the same name and a hdf5 "
        "extension",
    )
    parser.add_argument(
        "--data_validation_hdf5",
        help="input validation data HDF5 file. It is an intermediate preprocess"
        " version of the input CSV created the first time a CSV file is "
        "used in the same directory with the same name and a hdf5 "
        "extension",
    )
    parser.add_argument(
        "--data_test_hdf5",
        help="input test data HDF5 file. It is an intermediate preprocess "
        "version of the input CSV created the first time a CSV file is "
        "used in the same directory with the same name and a hdf5 "
        "extension",
    )

    parser.add_argument(
        "--train_set_metadata_json",
        help="input metadata JSON file. It is an intermediate preprocess file "
        "containing the mappings of the input CSV created the first time a"
        " CSV file is used in the same directory with the same name and a "
        "json extension",
    )

    parser.add_argument(
        "-sspi",
        "--skip_save_processed_input",
        help="skips saving intermediate HDF5 and JSON files",
        action="store_true",
        default=False,
    )

    # ----------------
    # Model parameters
    # ----------------
    model_definition = parser.add_mutually_exclusive_group(required=True)
    model_definition.add_argument("-md",
                                  "--model_definition",
                                  type=yaml.safe_load,
                                  help="model definition")
    model_definition.add_argument(
        "-mdf",
        "--model_definition_file",
        help="YAML file describing the model. Ignores --model_hyperparameters",
    )

    parser.add_argument(
        "-mlp",
        "--model_load_path",
        help="path of a pretrained model to load as initialization",
    )
    parser.add_argument(
        "-mrp",
        "--model_resume_path",
        help="path of a the model directory to resume training of",
    )
    parser.add_argument(
        "-sstd",
        "--skip_save_training_description",
        action="store_true",
        default=False,
        help="disables saving the description JSON file",
    )
    parser.add_argument(
        "-ssts",
        "--skip_save_training_statistics",
        action="store_true",
        default=False,
        help="disables saving training statistics JSON file",
    )
    parser.add_argument(
        "-ssm",
        "--skip_save_model",
        action="store_true",
        default=False,
        help="disables saving weights each time the model imrpoves. "
        "By default Ludwig saves  weights after each epoch "
        "the validation metric imrpvoes, but  if the model is really big "
        "that can be time consuming if you do not want to keep "
        "the weights and just find out what performance can a model get "
        "with a set of hyperparameters, use this parameter to skip it",
    )
    parser.add_argument(
        "-ssp",
        "--skip_save_progress",
        action="store_true",
        default=False,
        help="disables saving weights after each epoch. By default ludwig saves "
        "weights after each epoch for enabling resuming of training, but "
        "if the model is really big that can be time consuming and will "
        "save twice as much space, use this parameter to skip it",
    )
    parser.add_argument(
        "-ssl",
        "--skip_save_log",
        action="store_true",
        default=False,
        help="disables saving TensorBoard logs. By default Ludwig saves "
        "logs for the TensorBoard, but if it is not needed turning it off "
        "can slightly increase the overall speed",
    )

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-rs",
        "--random_seed",
        type=int,
        default=42,
        help="a random seed that is going to be used anywhere there is a call "
        "to a random number generator: data splitting, parameter "
        "initialization and training set shuffling",
    )
    parser.add_argument("-g",
                        "--gpus",
                        nargs="+",
                        type=int,
                        default=None,
                        help="list of gpus to use")
    parser.add_argument('-gml',
                        '--gpu_memory_limit',
                        type=int,
                        default=None,
                        help='maximum memory in MB to allocate per GPU device')
    parser.add_argument(
        "-uh",
        "--use_horovod",
        action="store_true",
        default=False,
        help="uses horovod for distributed training",
    )
    parser.add_argument(
        "-dbg",
        "--debug",
        action="store_true",
        default=False,
        help="enables debugging mode",
    )
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    args = parser.parse_args(sys_argv)

    logging.getLogger('ludwig').setLevel(
        logging_level_registry[args.logging_level])
    global logger
    logger = logging.getLogger('ludwig.hyperopt')

    set_on_master(args.use_horovod)

    if is_on_master():
        print_ludwig("Hyperopt", LUDWIG_VERSION)

    hyperopt(**vars(args))
Exemplo n.º 9
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script trains and tests a model',
        prog='ludwig experiment',
        usage='%(prog)s [options]')

    # ----------------------------
    # Experiment naming parameters
    # ----------------------------
    parser.add_argument('--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')
    parser.add_argument('--experiment_name',
                        type=str,
                        default='experiment',
                        help='experiment name')
    parser.add_argument('--model_name',
                        type=str,
                        default='run',
                        help='name for the model')

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        '--data_csv',
        help='input data CSV file. If it has a split column, it will be used '
        'for splitting (0: train, 1: validation, 2: test), otherwise the '
        'dataset will be randomly split')
    parser.add_argument('--data_train_csv', help='input train data CSV file')
    parser.add_argument('--data_validation_csv',
                        help='input validation data CSV file')
    parser.add_argument('--data_test_csv', help='input test data CSV file')

    parser.add_argument(
        '--data_hdf5',
        help='input data HDF5 file. It is an intermediate preprocess version of'
        ' the input CSV created the first time a CSV file is used in the '
        'same directory with the same name and a hdf5 extension')
    parser.add_argument(
        '--data_train_hdf5',
        help='input train data HDF5 file. It is an intermediate preprocess '
        'version of the input CSV created the first time a CSV file is '
        'used in the same directory with the same name and a hdf5 '
        'extension')
    parser.add_argument(
        '--data_validation_hdf5',
        help='input validation data HDF5 file. It is an intermediate preprocess'
        ' version of the input CSV created the first time a CSV file is '
        'used in the same directory with the same name and a hdf5 '
        'extension')
    parser.add_argument(
        '--data_test_hdf5',
        help='input test data HDF5 file. It is an intermediate preprocess '
        'version of the input CSV created the first time a CSV file is '
        'used in the same directory with the same name and a hdf5 '
        'extension')

    parser.add_argument(
        '--train_set_metadata_json',
        help='input metadata JSON file. It is an intermediate preprocess file'
        ' containing the mappings of the input CSV created the first time '
        'a CSV file is used in the same directory with the same name and a'
        ' json extension')

    parser.add_argument('-sspi',
                        '--skip_save_processed_input',
                        help='skips saving intermediate HDF5 and JSON files',
                        action='store_true',
                        default=False)
    parser.add_argument('-ssuo',
                        '--skip_save_unprocessed_output',
                        help='skips saving intermediate NPY output files',
                        action='store_true',
                        default=False)

    # -----------------
    # K-fold parameters
    # -----------------
    parser.add_argument(
        '-kf',
        '--k_fold',
        type=int,
        default=None,
        help='number of folds for a k-fold cross validation run ')
    parser.add_argument(
        '-skfsi',
        '--skip_save_k_fold_split_indices',
        action='store_true',
        default=False,
        help='disables saving indices generated to split training data set '
        'for the k-fold cross validation run, but if it is not needed '
        'turning it off can slightly increase the overall speed')

    # ----------------
    # Model parameters
    # ----------------
    model_definition = parser.add_mutually_exclusive_group(required=True)
    model_definition.add_argument('-md',
                                  '--model_definition',
                                  type=yaml.safe_load,
                                  help='model definition')
    model_definition.add_argument(
        '-mdf',
        '--model_definition_file',
        help='YAML file describing the model. Ignores --model_hyperparameters')

    parser.add_argument(
        '-mlp',
        '--model_load_path',
        help='path of a pretrained model to load as initialization')
    parser.add_argument(
        '-mrp',
        '--model_resume_path',
        help='path of a the model directory to resume training of')
    parser.add_argument('-sstd',
                        '--skip_save_training_description',
                        action='store_true',
                        default=False,
                        help='disables saving the description JSON file')
    parser.add_argument('-ssts',
                        '--skip_save_training_statistics',
                        action='store_true',
                        default=False,
                        help='disables saving training statistics JSON file')
    parser.add_argument('-sstp',
                        '--skip_save_test_predictions',
                        help='skips saving test predictions CSV files',
                        action='store_true',
                        default=False)
    parser.add_argument('-sstes',
                        '--skip_save_test_statistics',
                        help='skips saving test statistics JSON file',
                        action='store_true',
                        default=False)
    parser.add_argument(
        '-ssm',
        '--skip_save_model',
        action='store_true',
        default=False,
        help='disables saving model weights and hyperparameters each time '
        'the model improves. '
        'By default Ludwig saves model weights after each epoch '
        'the validation metric imprvoes, but if the model is really big '
        'that can be time consuming if you do not want to keep '
        'the weights and just find out what performance can a model get '
        'with a set of hyperparameters, use this parameter to skip it,'
        'but the model will not be loadable later on')
    parser.add_argument(
        '-ssp',
        '--skip_save_progress',
        action='store_true',
        default=False,
        help='disables saving progress each epoch. By default Ludwig saves '
        'weights and stats  after each epoch for enabling resuming '
        'of training, but if the model is really big that can be '
        'time consuming and will uses twice as much space, use '
        'this parameter to skip it, but training cannot be resumed '
        'later on')
    parser.add_argument(
        '-ssl',
        '--skip_save_log',
        action='store_true',
        default=False,
        help='disables saving TensorBoard logs. By default Ludwig saves '
        'logs for the TensorBoard, but if it is not needed turning it off '
        'can slightly increase the overall speed')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        '-rs',
        '--random_seed',
        type=int,
        default=42,
        help='a random seed that is going to be used anywhere there is a call '
        'to a random number generator: data splitting, parameter '
        'initialization and training set shuffling')
    parser.add_argument('-g',
                        '--gpus',
                        nargs='+',
                        type=int,
                        default=None,
                        help='list of GPUs to use')
    parser.add_argument('-gml',
                        '--gpu_memory_limit',
                        type=int,
                        default=None,
                        help='maximum memory in MB to allocate per GPU device')
    parser.add_argument(
        '-dpt',
        '--disable_parallel_threads',
        action='store_false',
        dest='allow_parallel_threads',
        help='disable TensorFlow from using multithreading for reproducibility'
    )
    parser.add_argument('-uh',
                        '--use_horovod',
                        action='store_true',
                        default=None,
                        help='uses horovod for distributed training')
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    args = parser.parse_args(sys_argv)

    logging.getLogger('ludwig').setLevel(
        logging_level_registry[args.logging_level])
    global logger
    logger = logging.getLogger('ludwig.experiment')

    set_on_master(args.use_horovod)

    if is_on_master():
        print_ludwig('Experiment', LUDWIG_VERSION)

    if args.k_fold is None:
        full_experiment(**vars(args))
    else:
        full_kfold_cross_validate(**vars(args))
Exemplo n.º 10
0
def full_experiment(model_definition,
                    model_definition_file=None,
                    data_df=None,
                    data_train_df=None,
                    data_validation_df=None,
                    data_test_df=None,
                    data_csv=None,
                    data_train_csv=None,
                    data_validation_csv=None,
                    data_test_csv=None,
                    data_hdf5=None,
                    data_train_hdf5=None,
                    data_validation_hdf5=None,
                    data_test_hdf5=None,
                    train_set_metadata_json=None,
                    experiment_name='experiment',
                    model_name='run',
                    model_load_path=None,
                    model_resume_path=None,
                    skip_save_training_description=False,
                    skip_save_training_statistics=False,
                    skip_save_model=False,
                    skip_save_progress=False,
                    skip_save_log=False,
                    skip_save_processed_input=False,
                    skip_save_unprocessed_output=False,
                    skip_save_test_predictions=False,
                    skip_save_test_statistics=False,
                    output_directory='results',
                    gpus=None,
                    gpu_memory_limit=None,
                    allow_parallel_threads=True,
                    use_horovod=None,
                    random_seed=default_random_seed,
                    debug=False,
                    **kwargs):
    """Trains a model on a dataset's training and validation splits and
    uses it to predict on the test split.
    It saves the trained model and the statistics of training and testing.
    :param model_definition: Model definition which defines the different
           parameters of the model, features, preprocessing and training.
    :type model_definition: Dictionary
    :param model_definition_file: The file that specifies the model definition.
           It is a yaml file.
    :type model_definition_file: filepath (str)
    :param data_csv: A CSV file containing the input data which is used to
           train, validate and test a model. The CSV either contains a
           split column or will be split.
    :type data_csv: filepath (str)
    :param data_train_csv: A CSV file containing the input data which is used
           to train a model.
    :type data_train_csv: filepath (str)
    :param data_validation_csv: A CSV file containing the input data which is used
           to validate a model..
    :type data_validation_csv: filepath (str)
    :param data_test_csv: A CSV file containing the input data which is used
           to test a model.
    :type data_test_csv: filepath (str)
    :param data_hdf5: If the dataset is in the hdf5 format, this is used instead
           of the csv file.
    :type data_hdf5: filepath (str)
    :param data_train_hdf5: If the training set is in the hdf5 format, this is
           used instead of the csv file.
    :type data_train_hdf5: filepath (str)
    :param data_validation_hdf5: If the validation set is in the hdf5 format,
           this is used instead of the csv file.
    :type data_validation_hdf5: filepath (str)
    :param data_test_hdf5: If the test set is in the hdf5 format, this is
           used instead of the csv file.
    :type data_test_hdf5: filepath (str)
    :param train_set_metadata_json: If the dataset is in hdf5 format, this is
           the associated json file containing metadata.
    :type train_set_metadata_json: filepath (str)
    :param experiment_name: The name for the experiment.
    :type experiment_name: Str
    :param model_name: Name of the model that is being used.
    :type model_name: Str
    :param model_load_path: If this is specified the loaded model will be used
           as initialization (useful for transfer learning).
    :type model_load_path: filepath (str)
    :param model_resume_path: Resumes training of the model from the path
           specified. The difference with model_load_path is that also training
           statistics like the current epoch and the loss and performance so
           far are also resumed effectively continuing a previously interrupted
           training process.
    :type model_resume_path: filepath (str)
    :param skip_save_training_description: Disables saving
           the description JSON file.
    :type skip_save_training_description: Boolean
    :param skip_save_training_statistics: Disables saving
           training statistics JSON file.
    :type skip_save_training_statistics: Boolean
    :param skip_save_model: Disables
               saving model weights and hyperparameters each time the model
           improves. By default Ludwig saves model weights after each epoch
           the validation metric improves, but if the model is really big
           that can be time consuming if you do not want to keep
           the weights and just find out what performance can a model get
           with a set of hyperparameters, use this parameter to skip it,
           but the model will not be loadable later on.
    :type skip_save_model: Boolean
    :param skip_save_progress: Disables saving
           progress each epoch. By default Ludwig saves weights and stats
           after each epoch for enabling resuming of training, but if
           the model is really big that can be time consuming and will uses
           twice as much space, use this parameter to skip it, but training
           cannot be resumed later on.
    :type skip_save_progress: Boolean
    :param skip_save_log: Disables saving TensorBoard
           logs. By default Ludwig saves logs for the TensorBoard, but if it
           is not needed turning it off can slightly increase the
           overall speed..
    :type skip_save_log: Boolean
    :param skip_save_processed_input: If a CSV dataset is provided it is
           preprocessed and then saved as an hdf5 and json to avoid running
           the preprocessing again. If this parameter is False,
           the hdf5 and json file are not saved.
    :type skip_save_processed_input: Boolean
    :param skip_save_unprocessed_output: By default predictions and
           their probabilities are saved in both raw unprocessed numpy files
           containing tensors and as postprocessed CSV files
           (one for each output feature). If this parameter is True,
           only the CSV ones are saved and the numpy ones are skipped.
    :type skip_save_unprocessed_output: Boolean
    :param skip_save_test_predictions: skips saving test predictions CSV files
    :type skip_save_test_predictions: Boolean
    :param skip_save_test_statistics: skips saving test statistics JSON file
    :type skip_save_test_statistics: Boolean
    :param output_directory: The directory that will contain the training
           statistics, the saved model and the training progress files.
    :type output_directory: filepath (str)
    :param gpus: List of GPUs that are available for training.
    :type gpus: List
    :param gpu_memory_limit: maximum memory in MB to allocate per GPU device.
    :type gpu_memory_limit: Integer
    :param allow_parallel_threads: allow TensorFlow to use multithreading parallelism
           to improve performance at the cost of determinism.
    :type allow_parallel_threads: Boolean
    :param use_horovod: Flag for using horovod
    :type use_horovod: Boolean
    :param random_seed: Random seed used for weights initialization,
           splits and any other random function.
    :type random_seed: Integer
    :param debug: If true turns on tfdbg with inf_or_nan checks.
    :type debug: Boolean
    """
    set_on_master(use_horovod)

    (
        model,
        preprocessed_data,
        experiment_dir_name,
        _,  # train_stats
        model_definition,
        test_results) = experiment(
            model_definition,
            model_definition_file=model_definition_file,
            data_df=data_df,
            data_train_df=data_train_df,
            data_validation_df=data_validation_df,
            data_test_df=data_test_df,
            data_csv=data_csv,
            data_train_csv=data_train_csv,
            data_validation_csv=data_validation_csv,
            data_test_csv=data_test_csv,
            data_hdf5=data_hdf5,
            data_train_hdf5=data_train_hdf5,
            data_validation_hdf5=data_validation_hdf5,
            data_test_hdf5=data_test_hdf5,
            train_set_metadata_json=train_set_metadata_json,
            experiment_name=experiment_name,
            model_name=model_name,
            model_load_path=model_load_path,
            model_resume_path=model_resume_path,
            skip_save_training_description=skip_save_training_description,
            skip_save_training_statistics=skip_save_training_statistics,
            skip_save_model=skip_save_model,
            skip_save_progress=skip_save_progress,
            skip_save_log=skip_save_log,
            skip_save_processed_input=skip_save_processed_input,
            output_directory=output_directory,
            gpus=gpus,
            gpu_memory_limit=gpu_memory_limit,
            allow_parallel_threads=allow_parallel_threads,
            use_horovod=use_horovod,
            random_seed=random_seed,
            debug=debug,
            **kwargs)

    (training_set, validation_set, test_set,
     train_set_metadata) = preprocessed_data

    if test_set is not None:
        # check if we need to create the output dir
        if is_on_master():
            if not (skip_save_unprocessed_output and skip_save_test_predictions
                    and skip_save_test_statistics):
                if not os.path.exists(experiment_dir_name):
                    os.makedirs(experiment_dir_name)

        # postprocess
        postprocessed_output = postprocess(
            test_results, model_definition['output_features'],
            train_set_metadata, experiment_dir_name,
            skip_save_unprocessed_output or not is_on_master())

        if is_on_master():
            print_test_results(test_results)
            if not skip_save_test_predictions:
                save_prediction_outputs(postprocessed_output,
                                        experiment_dir_name)
            if not skip_save_test_statistics:
                save_test_statistics(test_results, experiment_dir_name)

    if is_on_master():
        logger.info('\nFinished: {0}_{1}'.format(experiment_name, model_name))
        logger.info('Saved to: {}'.format(experiment_dir_name))

    contrib_command("experiment_save", experiment_dir_name)
    return experiment_dir_name
Exemplo n.º 11
0
def experiment(
        model_definition,
        model_definition_file=None,
        data_csv=None,
        data_train_csv=None,
        data_validation_csv=None,
        data_test_csv=None,
        data_hdf5=None,
        data_train_hdf5=None,
        data_validation_hdf5=None,
        data_test_hdf5=None,
        train_set_metadata_json=None,
        experiment_name='experiment',
        model_name='run',
        model_load_path=None,
        model_resume_path=None,
        skip_save_progress_weights=False,
        skip_save_processed_input=False,
        skip_save_unprocessed_output=False,
        output_directory='results',
        gpus=None,
        gpu_fraction=1.0,
        use_horovod=False,
        random_seed=default_random_seed,
        debug=False,
        **kwargs
):
    """Trains a model on a dataset's training and validation splits and
    uses it to predict on the test split.
    It saves the trained model and the statistics of training and testing.
    :param model_definition: Model definition which defines the different
           parameters of the model, features, preprocessing and training.
    :type model_definition: Dictionary
    :param model_definition_file: The file that specifies the model definition.
           It is a yaml file.
    :type model_definition_file: filepath (str)
    :param data_csv: A CSV file contanining the input data which is used to
           train, validate and test a model. The CSV either contains a
           split column or will be split.
    :type data_csv: filepath (str)
    :param data_train_csv: A CSV file contanining the input data which is used
           to train a model.
    :type data_train_csv: filepath (str)
    :param data_validation_csv: A CSV file contanining the input data which is used
           to validate a model..
    :type data_validation_csv: filepath (str)
    :param data_test_csv: A CSV file contanining the input data which is used
           to test a model.
    :type data_test_csv: filepath (str)
    :param data_hdf5: If the dataset is in the hdf5 format, this is used instead
           of the csv file.
    :type data_hdf5: filepath (str)
    :param data_train_hdf5: If the training set is in the hdf5 format, this is
           used instead of the csv file.
    :type data_train_hdf5: filepath (str)
    :param data_validation_hdf5: If the validation set is in the hdf5 format,
           this is used instead of the csv file.
    :type data_validation_hdf5: filepath (str)
    :param data_test_hdf5: If the test set is in the hdf5 format, this is
           used instead of the csv file.
    :type data_test_hdf5: filepath (str)
    :param train_set_metadata_json: If the dataset is in hdf5 format, this is
           the associated json file containing metadata.
    :type train_set_metadata_json: filepath (str)
    :param experiment_name: The name for the experiment.
    :type experiment_name: Str
    :param model_name: Name of the model that is being used.
    :type model_name: Str
    :param model_load_path: If this is specified the loaded model will be used
           as initialization (useful for transfer learning).
    :type model_load_path: filepath (str)
    :param model_resume_path: Resumes training of the model from the path
           specified. The difference with model_load_path is that also training
           statistics like the current epoch and the loss and performance so
           far are also resumed effectively cotinuing a previously interrupted
           training process.
    :type model_resume_path: filepath (str)
    :param skip_save_progress_weights: Skips saving the weights at the end of
           each epoch. If this is true, training cannot be resumed from the
           exactly the state at the end of the previous epoch.
    :type skip_save_progress_weights: Boolean
    :param skip_save_processed_input: If a CSV dataset is provided it is
           preprocessed and then saved as an hdf5 and json to avoid running
           the preprocessing again. If this parameter is False,
           the hdf5 and json file are not saved.
    :type skip_save_processed_input: Boolean
    :param skip_save_unprocessed_output: By default predictions and
           their probabilities are saved in both raw unprocessed numpy files
           contaning tensors and as postprocessed CSV files
           (one for each output feature). If this parameter is True,
           only the CSV ones are saved and the numpy ones are skipped.
    :type skip_save_unprocessed_output: Boolean
    :param output_directory: The directory that will contanin the training
           statistics, the saved model and the training procgress files.
    :type output_directory: filepath (str)
    :param gpus: List of GPUs that are available for training.
    :type gpus: List
    :param gpu_fraction: Fraction of the memory of each GPU to use at
           the beginning of the training. The memory may grow elastically.
    :type gpu_fraction: Integer
    :param random_seed: Random seed used for weights initialization,
           splits and any other random function.
    :type random_seed: Integer
    :param debug: If true turns on tfdbg with inf_or_nan checks.
    :type debug: Boolean
    """
    # set input features defaults
    if model_definition_file is not None:
        with open(model_definition_file, 'r') as def_file:
            model_definition = merge_with_defaults(yaml.load(def_file))
    else:
        model_definition = merge_with_defaults(model_definition)

    # setup directories and file names
    experiment_dir_name = None
    if model_resume_path is not None:
        if os.path.exists(model_resume_path):
            experiment_dir_name = model_resume_path
        else:
            if is_on_master():
                logging.info(
                    'Model resume path does not exists, '
                    'starting training from scratch'
                )
            model_resume_path = None

    if model_resume_path is None:
        if is_on_master():
            experiment_dir_name = get_experiment_dir_name(
                output_directory,
                experiment_name,
                model_name
            )
        else:
            experiment_dir_name = '/'
    description_fn, training_stats_fn, model_dir = get_file_names(
        experiment_dir_name
    )

    # save description
    description = get_experiment_description(
        model_definition,
        data_csv,
        data_train_csv,
        data_validation_csv,
        data_test_csv,
        data_hdf5,
        data_train_hdf5,
        data_validation_hdf5,
        data_test_hdf5,
        train_set_metadata_json,
        random_seed
    )
    if is_on_master():
        save_json(description_fn, description)
        # print description
        logging.info('Experiment name: {}'.format(experiment_name))
        logging.info('Model name: {}'.format(model_name))
        logging.info('Output path: {}'.format(experiment_dir_name))
        logging.info('')
        for key, value in description.items():
            logging.info('{}: {}'.format(key, pformat(value, indent=4)))
        logging.info('')

    # preprocess
    (
        training_set,
        validation_set,
        test_set,
        train_set_metadata
    ) = preprocess_for_training(
        model_definition,
        data_csv=data_csv,
        data_train_csv=data_train_csv,
        data_validation_csv=data_validation_csv,
        data_test_csv=data_test_csv,
        data_hdf5=data_hdf5,
        data_train_hdf5=data_train_hdf5,
        data_validation_hdf5=data_validation_hdf5,
        data_test_hdf5=data_test_hdf5,
        train_set_metadata_json=train_set_metadata_json,
        skip_save_processed_input=skip_save_processed_input,
        preprocessing_params=model_definition[
            'preprocessing'],
        random_seed=random_seed
    )
    if is_on_master():
        logging.info('Training set: {0}'.format(training_set.size))
        if validation_set is not None:
            logging.info('Validation set: {0}'.format(validation_set.size))
        if test_set is not None:
            logging.info('Test set: {0}'.format(test_set.size))

    # update model definition with metadata properties
    update_model_definition_with_metadata(model_definition, train_set_metadata)

    # run the experiment
    model, training_results = train(
        training_set=training_set,
        validation_set=validation_set,
        test_set=test_set,
        model_definition=model_definition,
        save_path=model_dir,
        model_load_path=model_load_path,
        resume=model_resume_path is not None,
        skip_save_progress_weights=skip_save_progress_weights,
        gpus=gpus,
        gpu_fraction=gpu_fraction,
        use_horovod=use_horovod,
        random_seed=random_seed,
        debug=debug
    )
    (
        train_trainset_stats,
        train_valisest_stats,
        train_testset_stats
    ) = training_results

    if is_on_master():
        # save train set metadata
        save_json(
            os.path.join(
                model_dir,
                TRAIN_SET_METADATA_FILE_NAME
            ),
            train_set_metadata
        )

    # grab the results of the model with highest validation test performance
    validation_field = model_definition['training']['validation_field']
    validation_measure = model_definition['training']['validation_measure']
    validation_field_result = train_valisest_stats[validation_field]

    best_function = get_best_function(validation_measure)

    # print results of the model with highest validation test performance
    if is_on_master():
        if validation_set is not None:
            # max or min depending on the measure
            epoch_best_vali_measure, best_vali_measure = best_function(
                enumerate(validation_field_result[validation_measure]),
                key=lambda pair: pair[1]
            )
            logging.info('Best validation model epoch: {0}'.format(
                epoch_best_vali_measure + 1)
            )
            logging.info(
                'Best validation model {0} on validation set {1}: {2}'.format(
                    validation_measure,
                    validation_field,
                    best_vali_measure)
            )
        
            if test_set is not None:
                best_vali_measure_epoch_test_measure = train_testset_stats[
                    validation_field
                ][validation_measure][epoch_best_vali_measure]
                logging.info(
                    'Best validation model {0} on test set {1}: {2}'.format(
                        validation_measure,
                        validation_field,
                        best_vali_measure_epoch_test_measure
                    )
                )

    # save training statistics
    if is_on_master():
        save_json(
            training_stats_fn,
            {
                'train': train_trainset_stats,
                'validation': train_valisest_stats,
                'test': train_testset_stats
            }
        )

    
    if test_set is not None:
        # predict
        test_results = predict(
            test_set,
            train_set_metadata,
            model,
            model_definition,
            model_definition['training']['batch_size'],
            only_predictions=False,
            gpus=gpus,
            gpu_fraction=gpu_fraction,
            debug=debug
        )
        # postprocess
        postprocessed_output = postprocess(
            test_results,
            model_definition['output_features'],
            train_set_metadata,
            experiment_dir_name,
            skip_save_unprocessed_output or not is_on_master()
        )

        if is_on_master():
            print_prediction_results(test_results)

            save_prediction_outputs(postprocessed_output, experiment_dir_name)
            save_prediction_statistics(test_results, experiment_dir_name)
    
    model.close_session()

    if is_on_master():
        logging.info('\nFinished: {0}_{1}'.format(
            experiment_name, model_name))
        logging.info('Saved to: {}'.format(experiment_dir_name))

    return experiment_dir_name
Exemplo n.º 12
0
    def postprocess_results(
        output_feature,
        result,
        metadata,
        experiment_dir_name,
        skip_save_unprocessed_output=False,
    ):
        postprocessed = {}
        name = output_feature['name']

        npy_filename = None
        if is_on_master():
            npy_filename = os.path.join(experiment_dir_name, '{}_{}.npy')
        else:
            skip_save_unprocessed_output = True

        if PREDICTIONS in result and len(result[PREDICTIONS]) > 0:
            preds = result[PREDICTIONS]
            if 'idx2str' in metadata:
                postprocessed[PREDICTIONS] = [
                    metadata['idx2str'][pred] for pred in preds
                ]

            else:
                postprocessed[PREDICTIONS] = preds

            if not skip_save_unprocessed_output:
                np.save(npy_filename.format(name, PREDICTIONS), preds)

            del result[PREDICTIONS]

        if PROBABILITIES in result and len(result[PROBABILITIES]) > 0:
            probs = result[PROBABILITIES].numpy()
            prob = np.amax(probs, axis=1)
            postprocessed[PROBABILITIES] = probs
            postprocessed[PROBABILITY] = prob

            if not skip_save_unprocessed_output:
                np.save(npy_filename.format(name, PROBABILITIES), probs)
                np.save(npy_filename.format(name, PROBABILITY), probs)

            del result[PROBABILITIES]

        if ('predictions_top_k' in result
                and len(result['predictions_top_k'])) > 0:

            preds_top_k = result['predictions_top_k']
            if 'idx2str' in metadata:
                postprocessed['predictions_top_k'] = [[
                    metadata['idx2str'][pred] for pred in pred_top_k
                ] for pred_top_k in preds_top_k]
            else:
                postprocessed['predictions_top_k'] = preds_top_k

            if not skip_save_unprocessed_output:
                np.save(npy_filename.format(name, 'predictions_top_k'),
                        preds_top_k)

            del result['predictions_top_k']

        return postprocessed
Exemplo n.º 13
0
def full_predict(model_path,
                 data_csv=None,
                 data_hdf5=None,
                 split=TEST,
                 batch_size=128,
                 skip_save_unprocessed_output=False,
                 skip_save_test_predictions=False,
                 skip_save_test_statistics=False,
                 output_directory='results',
                 evaluate_performance=True,
                 gpus=None,
                 gpu_fraction=1.0,
                 use_horovod=False,
                 debug=False,
                 **kwargs):
    if is_on_master():
        logger.info('Dataset path: {}'.format(
            data_csv if data_csv is not None else data_hdf5))
        logger.info('Model path: {}'.format(model_path))
        logger.info('')

    train_set_metadata_json_fp = os.path.join(model_path,
                                              TRAIN_SET_METADATA_FILE_NAME)

    # preprocessing
    dataset, train_set_metadata = preprocess_for_prediction(
        model_path, split, data_csv, data_hdf5, train_set_metadata_json_fp,
        evaluate_performance)

    # run the prediction
    if is_on_master():
        print_boxed('LOADING MODEL')
    model, model_definition = load_model_and_definition(
        model_path, use_horovod=use_horovod)

    prediction_results = predict(dataset, train_set_metadata, model,
                                 model_definition, batch_size,
                                 evaluate_performance, gpus, gpu_fraction,
                                 debug)
    model.close_session()

    if is_on_master():
        # setup directories and file names
        experiment_dir_name = find_non_existing_dir_by_adding_suffix(
            output_directory)

        # if we are skipping all saving,
        # there is no need to create a directory that will remain empty
        should_create_exp_dir = not (skip_save_unprocessed_output
                                     and skip_save_test_predictions
                                     and skip_save_test_statistics)
        if should_create_exp_dir:
            os.makedirs(experiment_dir_name)

        # postprocess
        postprocessed_output = postprocess(
            prediction_results, model_definition['output_features'],
            train_set_metadata, experiment_dir_name,
            skip_save_unprocessed_output or not is_on_master())

        if not skip_save_test_predictions:
            save_prediction_outputs(postprocessed_output, experiment_dir_name)

        if evaluate_performance:
            print_test_results(prediction_results)
            if not skip_save_test_statistics:
                save_test_statistics(prediction_results, experiment_dir_name)

        logger.info('Saved to: {0}'.format(experiment_dir_name))
Exemplo n.º 14
0
def full_predict(model_path,
                 data_csv=None,
                 data_hdf5=None,
                 split='test',
                 batch_size=128,
                 skip_save_unprocessed_output=False,
                 output_directory='results',
                 evaluate_performance=True,
                 gpus=None,
                 gpu_fraction=1.0,
                 use_horovod=False,
                 debug=False,
                 **kwargs):
    # setup directories and file names
    experiment_dir_name = output_directory
    suffix = 0
    while os.path.exists(experiment_dir_name):
        experiment_dir_name = output_directory + '_' + str(suffix)
        suffix += 1

    if is_on_master():
        logging.info('Dataset path: {}'.format(
            data_csv if data_csv is not None else data_hdf5))
        logging.info('Model path: {}'.format(model_path))
        logging.info('Output path: {}'.format(experiment_dir_name))
        logging.info('')

    train_set_metadata_json_fp = os.path.join(model_path,
                                              TRAIN_SET_METADATA_FILE_NAME)

    # preprocessing
    dataset, train_set_metadata = preprocess_for_prediction(
        model_path, split, data_csv, data_hdf5, train_set_metadata_json_fp,
        evaluate_performance)

    # run the prediction
    if is_on_master():
        print_boxed('LOADING MODEL')
    model, model_definition = load_model_and_definition(
        model_path, use_horovod=use_horovod)

    prediction_results = predict(dataset, train_set_metadata, model,
                                 model_definition, batch_size,
                                 evaluate_performance, gpus, gpu_fraction,
                                 debug)
    model.close_session()

    if is_on_master():
        os.mkdir(experiment_dir_name)

        # postprocess
        postprocessed_output = postprocess(
            prediction_results, model_definition['output_features'],
            train_set_metadata, experiment_dir_name,
            skip_save_unprocessed_output or not is_on_master())

        save_prediction_outputs(postprocessed_output, experiment_dir_name)

        if evaluate_performance:
            print_prediction_results(prediction_results)
            save_prediction_statistics(prediction_results, experiment_dir_name)

        logging.info('Saved to: {0}'.format(experiment_dir_name))
Exemplo n.º 15
0
def train(training_set,
          validation_set,
          test_set,
          model_definition,
          save_path='model',
          model_load_path=None,
          resume=False,
          skip_save_progress_weights=False,
          gpus=None,
          gpu_fraction=1.0,
          use_horovod=False,
          random_seed=default_random_seed,
          debug=False):
    """
    :param training_set: Dataset contaning training data
    :type training_set: Dataset
    :param validation_set: Dataset contaning validation data
    :type validation_set: Datasetk
    :param test_set: Dataset contaning test data.
    :type test_set: Dataset
    :param model_definition: Model definition which defines the different
           parameters of the model, features, preprocessing and training.
    :type model_definition: Dictionary
    :param save_path: The path to save the model to.
    :type save_path: filepath (str)
    :param model_load_path: If this is specified the loaded model will be used
           as initialization (useful for transfer learning).
    :type model_load_path: filepath (str)
    :param skip_save_progress_weights: Skips saving the weights at the end of
           each epoch. If this is true, training cannot be resumed from the
           exactly the state at the end of the previous epoch.
    :type skip_save_progress_weights: Boolean
    :param gpus: List of GPUs that are available for training.
    :type gpus: List
    :param gpu_fraction: Fraction of the memory of each GPU to use at
           the beginning of the training. The memory may grow elastically.
    :type gpu_fraction: Integer
    :param random_seed: Random seed used for weights initialization,
           splits and any other random function.
    :type random_seed: Integer
    :param debug: If true turns on tfdbg with inf_or_nan checks.
    :type debug: Boolean
    :returns: None
    """
    if model_load_path is not None:
        # Load model
        if is_on_master():
            print_boxed('LOADING MODEL')
            logging.info('Loading model: {}\n'.format(model_load_path))
        model, _ = load_model_and_definition(model_load_path)
    else:
        # Build model
        if is_on_master():
            print_boxed('BUILDING MODEL', print_fun=logging.debug)
        model = Model(model_definition['input_features'],
                      model_definition['output_features'],
                      model_definition['combiner'],
                      model_definition['training'],
                      model_definition['preprocessing'],
                      use_horovod=use_horovod,
                      random_seed=random_seed,
                      debug=debug)

    # Train model
    if is_on_master():
        print_boxed('TRAINING')
    return model, model.train(
        training_set,
        validation_set=validation_set,
        test_set=test_set,
        save_path=save_path,
        resume=resume,
        skip_save_progress_weights=skip_save_progress_weights,
        gpus=gpus,
        gpu_fraction=gpu_fraction,
        random_seed=random_seed,
        **model_definition['training'])
Exemplo n.º 16
0
    def postprocess_results(
        output_feature,
        result,
        metadata,
        experiment_dir_name,
        skip_save_unprocessed_output=False,
    ):
        postprocessed = {}
        name = output_feature['name']

        npy_filename = None
        if is_on_master():
            npy_filename = os.path.join(experiment_dir_name, '{}_{}.npy')
        else:
            skip_save_unprocessed_output = True

        if PREDICTIONS in result and len(result[PREDICTIONS]) > 0:
            preds = result[PREDICTIONS]
            if 'idx2str' in metadata:
                postprocessed[PREDICTIONS] = [[
                    metadata['idx2str'][token]
                    if token < len(metadata['idx2str']) else UNKNOWN_SYMBOL
                    for token in pred
                ] for pred in preds]
            else:
                postprocessed[PREDICTIONS] = preds

            if not skip_save_unprocessed_output:
                np.save(npy_filename.format(name, PREDICTIONS), preds)

            del result[PREDICTIONS]

        if LAST_PREDICTIONS in result and len(result[LAST_PREDICTIONS]) > 0:
            last_preds = result[LAST_PREDICTIONS]
            if 'idx2str' in metadata:
                postprocessed[LAST_PREDICTIONS] = [
                    metadata['idx2str'][last_pred]
                    if last_pred < len(metadata['idx2str']) else UNKNOWN_SYMBOL
                    for last_pred in last_preds
                ]
            else:
                postprocessed[LAST_PREDICTIONS] = last_preds

            if not skip_save_unprocessed_output:
                np.save(npy_filename.format(name, LAST_PREDICTIONS),
                        last_preds)

            del result[LAST_PREDICTIONS]

        if PROBABILITIES in result and len(result[PROBABILITIES]) > 0:
            probs = result[PROBABILITIES].numpy()
            if probs is not None:

                if len(probs) > 0 and isinstance(probs[0], list):
                    prob = []
                    for i in range(len(probs)):
                        # todo: should adapt for the case of beam > 1
                        for j in range(len(probs[i])):
                            probs[i][j] = np.max(probs[i][j])
                        prob.append(np.prod(probs[i]))
                elif isinstance(probs, np.ndarray):
                    if (probs.shape) == 3:  # prob of each class of each token
                        probs = np.amax(probs, axis=-1)
                    prob = np.prod(probs, axis=-1)

                # commenting probabilities out because usually it is huge:
                # dataset x length x classes
                # todo: add a mechanism for letting the user decide to save it
                # postprocessed[PROBABILITIES] = probs
                postprocessed[PROBABILITY] = prob

                if not skip_save_unprocessed_output:
                    # commenting probabilities out, see comment above
                    # np.save(npy_filename.format(name, PROBABILITIES), probs)
                    np.save(npy_filename.format(name, PROBABILITY), prob)

            del result[PROBABILITIES]

        if LENGTHS in result:
            del result[LENGTHS]

        return postprocessed
Exemplo n.º 17
0
def train(training_set,
          validation_set,
          test_set,
          model_definition,
          save_path='model',
          model_load_path=None,
          resume=False,
          skip_save_model=False,
          skip_save_progress=False,
          skip_save_log=False,
          gpus=None,
          gpu_fraction=1.0,
          use_horovod=False,
          random_seed=default_random_seed,
          debug=False):
    """
    :param training_set: Dataset contaning training data
    :type training_set: Dataset
    :param validation_set: Dataset contaning validation data
    :type validation_set: Datasetk
    :param test_set: Dataset contaning test data.
    :type test_set: Dataset
    :param model_definition: Model definition which defines the different
           parameters of the model, features, preprocessing and training.
    :type model_definition: Dictionary
    :param save_path: The path to save the model to.
    :type save_path: filepath (str)
    :param model_load_path: If this is specified the loaded model will be used
           as initialization (useful for transfer learning).
    :type model_load_path: filepath (str)
    :param skip_save_model: Disables
               saving model weights and hyperparameters each time the model
           improves. By default Ludwig saves model weights after each epoch
           the validation measure imrpvoes, but if the model is really big
           that can be time consuming if you do not want to keep
           the weights and just find out what performance can a model get
           with a set of hyperparameters, use this parameter to skip it,
           but the model will not be loadable later on.
    :type skip_save_model: Boolean
    :param skip_save_progress: Disables saving
           progress each epoch. By default Ludwig saves weights and stats
           after each epoch for enabling resuming of training, but if
           the model is really big that can be time consuming and will uses
           twice as much space, use this parameter to skip it, but training
           cannot be resumed later on.
    :type skip_save_progress: Boolean
    :param skip_save_log: Disables saving TensorBoard
           logs. By default Ludwig saves logs for the TensorBoard, but if it
           is not needed turning it off can slightly increase the
           overall speed..
    :type skip_save_log: Boolean
    :param gpus: List of GPUs that are available for training.
    :type gpus: List
    :param gpu_fraction: Fraction of the memory of each GPU to use at
           the beginning of the training. The memory may grow elastically.
    :type gpu_fraction: Integer
    :param random_seed: Random seed used for weights initialization,
           splits and any other random function.
    :type random_seed: Integer
    :param debug: If true turns on tfdbg with inf_or_nan checks.
    :type debug: Boolean
    :returns: None
    """
    if model_load_path is not None:
        # Load model
        if is_on_master():
            print_boxed('LOADING MODEL')
            logger.info('Loading model: {}\n'.format(model_load_path))
        model, _ = load_model_and_definition(model_load_path)
    else:
        # Build model
        if is_on_master():
            print_boxed('BUILDING MODEL', print_fun=logger.debug)

        model = Model(model_definition['input_features'],
                      model_definition['output_features'],
                      model_definition['combiner'],
                      model_definition['training'],
                      model_definition['preprocessing'],
                      use_horovod=use_horovod,
                      random_seed=random_seed,
                      debug=debug)

    contrib_command("train_model", model, model_definition, model_load_path)

    # Train model
    if is_on_master():
        print_boxed('TRAINING')
    return model, model.train(training_set,
                              validation_set=validation_set,
                              test_set=test_set,
                              save_path=save_path,
                              resume=resume,
                              skip_save_model=skip_save_model,
                              skip_save_progress=skip_save_progress,
                              skip_save_log=skip_save_log,
                              gpus=gpus,
                              gpu_fraction=gpu_fraction,
                              random_seed=random_seed,
                              **model_definition['training'])
Exemplo n.º 18
0
def _preprocess_csv_for_training(
        features,
        data_csv=None,
        data_train_csv=None,
        data_validation_csv=None,
        data_test_csv=None,
        train_set_metadata_json=None,
        skip_save_processed_input=False,
        preprocessing_params=default_preprocessing_parameters,
        random_seed=default_random_seed
):
    """
    Method to pre-process csv data
    :param features: list of all features (input + output)
    :param data_csv: path to the csv data
    :param data_train_csv:  training csv data
    :param data_validation_csv: validation csv data
    :param data_test_csv: test csv data
    :param train_set_metadata_json: train set metadata json
    :param skip_save_processed_input: if False, the pre-processed data is saved
    as .hdf5 files in the same location as the csvs with the same names.
    :param preprocessing_params: preprocessing parameters
    :param random_seed: random seed
    :return: training, test, validation datasets, training metadata
    """
    train_set_metadata = None
    if train_set_metadata_json is not None:
        train_set_metadata = load_metadata(train_set_metadata_json)

    if data_csv is not None:
        # Use data and ignore _train, _validation and _test.
        # Also ignore data and train set metadata needs preprocessing
        logger.info(
            'Using full raw csv, no hdf5 and json file '
            'with the same name have been found'
        )
        logger.info('Building dataset (it may take a while)')
        data, train_set_metadata = build_dataset(
            data_csv,
            features,
            preprocessing_params,
            train_set_metadata=train_set_metadata,
            random_seed=random_seed
        )
        if is_on_master() and not skip_save_processed_input:
            logger.info('Writing dataset')
            data_hdf5_fp = replace_file_extension(data_csv, 'hdf5')
            data_utils.save_hdf5(data_hdf5_fp, data, train_set_metadata)
            train_set_metadata[DATA_TRAIN_HDF5_FP] = data_hdf5_fp
            logger.info('Writing train set metadata with vocabulary')

            train_set_metadata_json_fp = replace_file_extension(
                data_csv,
                'json'
            )
            data_utils.save_json(
                train_set_metadata_json_fp, train_set_metadata)

        training_set, test_set, validation_set = split_dataset_tvt(
            data,
            data[SPLIT]
        )

    elif data_train_csv is not None:
        # use data_train (including _validation and _test if they are present)
        # and ignore data and train set metadata
        # needs preprocessing
        logger.info(
            'Using training raw csv, no hdf5 and json '
            'file with the same name have been found'
        )
        logger.info('Building dataset (it may take a while)')
        concatenated_df = concatenate_csv(
            data_train_csv,
            data_validation_csv,
            data_test_csv
        )
        concatenated_df.csv = data_train_csv
        data, train_set_metadata = build_dataset_df(
            concatenated_df,
            features,
            preprocessing_params,
            train_set_metadata=train_set_metadata,
            random_seed=random_seed
        )
        training_set, test_set, validation_set = split_dataset_tvt(
            data,
            data[SPLIT]
        )
        if is_on_master() and not skip_save_processed_input:
            logger.info('Writing dataset')
            data_train_hdf5_fp = replace_file_extension(data_train_csv, 'hdf5')
            data_utils.save_hdf5(
                data_train_hdf5_fp,
                training_set,
                train_set_metadata
            )
            train_set_metadata[DATA_TRAIN_HDF5_FP] = data_train_hdf5_fp
            if validation_set is not None:
                data_validation_hdf5_fp = replace_file_extension(
                    data_validation_csv,
                    'hdf5'
                )
                data_utils.save_hdf5(
                    data_validation_hdf5_fp,
                    validation_set,
                    train_set_metadata
                )
                train_set_metadata[DATA_TRAIN_HDF5_FP] = data_train_hdf5_fp

            if test_set is not None:
                data_test_hdf5_fp = replace_file_extension(data_test_csv,
                                                           'hdf5')
                data_utils.save_hdf5(
                    data_test_hdf5_fp,
                    test_set,
                    train_set_metadata
                )
                train_set_metadata[DATA_TRAIN_HDF5_FP] = data_train_hdf5_fp

            logger.info('Writing train set metadata with vocabulary')
            train_set_metadata_json_fp = replace_file_extension(data_train_csv,
                                                                'json')
            data_utils.save_json(
                train_set_metadata_json_fp,
                train_set_metadata,
            )

    return training_set, test_set, validation_set, train_set_metadata
Exemplo n.º 19
0
def full_train(model_definition,
               model_definition_file=None,
               data_df=None,
               data_train_df=None,
               data_validation_df=None,
               data_test_df=None,
               data_csv=None,
               data_train_csv=None,
               data_validation_csv=None,
               data_test_csv=None,
               data_hdf5=None,
               data_train_hdf5=None,
               data_validation_hdf5=None,
               data_test_hdf5=None,
               train_set_metadata_json=None,
               experiment_name='experiment',
               model_name='run',
               model_load_path=None,
               model_resume_path=None,
               skip_save_model=False,
               skip_save_progress=False,
               skip_save_log=False,
               skip_save_processed_input=False,
               output_directory='results',
               should_close_session=True,
               gpus=None,
               gpu_fraction=1.0,
               use_horovod=False,
               random_seed=42,
               debug=False,
               **kwargs):
    """*full_train* defines the entire training procedure used by Ludwig's
    internals. Requires most of the parameters that are taken into the model.
    Builds a full ludwig model and performs the training.
    :param data_test_df:
    :param data_df:
    :param data_train_df:
    :param data_validation_df:
    :param model_definition: Model definition which defines the different
           parameters of the model, features, preprocessing and training.
    :type model_definition: Dictionary
    :param model_definition_file: The file that specifies the model definition.
           It is a yaml file.
    :type model_definition_file: filepath (str)
    :param data_csv: A CSV file contanining the input data which is used to
           train, validate and test a model. The CSV either contains a
           split column or will be split.
    :type data_csv: filepath (str)
    :param data_train_csv: A CSV file contanining the input data which is used
           to train a model.
    :type data_train_csv: filepath (str)
    :param data_validation_csv: A CSV file contanining the input data which is used
           to validate a model..
    :type data_validation_csv: filepath (str)
    :param data_test_csv: A CSV file contanining the input data which is used
           to test a model.
    :type data_test_csv: filepath (str)
    :param data_hdf5: If the dataset is in the hdf5 format, this is used instead
           of the csv file.
    :type data_hdf5: filepath (str)
    :param data_train_hdf5: If the training set is in the hdf5 format, this is
           used instead of the csv file.
    :type data_train_hdf5: filepath (str)
    :param data_validation_hdf5: If the validation set is in the hdf5 format,
           this is used instead of the csv file.
    :type data_validation_hdf5: filepath (str)
    :param data_test_hdf5: If the test set is in the hdf5 format, this is
           used instead of the csv file.
    :type data_test_hdf5: filepath (str)
    :param train_set_metadata_json: If the dataset is in hdf5 format, this is
           the associated json file containing metadata.
    :type train_set_metadata_json: filepath (str)
    :param experiment_name: The name for the experiment.
    :type experiment_name: Str
    :param model_name: Name of the model that is being used.
    :type model_name: Str
    :param model_load_path: If this is specified the loaded model will be used
           as initialization (useful for transfer learning).
    :type model_load_path: filepath (str)
    :param model_resume_path: Resumes training of the model from the path
           specified. The difference with model_load_path is that also training
           statistics like the current epoch and the loss and performance so
           far are also resumed effectively cotinuing a previously interrupted
           training process.
    :type model_resume_path: filepath (str)
    :param skip_save_model: Disables
               saving model weights and hyperparameters each time the model
           improves. By default Ludwig saves model weights after each epoch
           the validation measure imrpvoes, but if the model is really big
           that can be time consuming if you do not want to keep
           the weights and just find out what performance can a model get
           with a set of hyperparameters, use this parameter to skip it,
           but the model will not be loadable later on.
    :type skip_save_model: Boolean
    :param skip_save_progress: Disables saving
           progress each epoch. By default Ludwig saves weights and stats
           after each epoch for enabling resuming of training, but if
           the model is really big that can be time consuming and will uses
           twice as much space, use this parameter to skip it, but training
           cannot be resumed later on.
    :type skip_save_progress: Boolean
    :param skip_save_processed_input: If a CSV dataset is provided it is
           preprocessed and then saved as an hdf5 and json to avoid running
           the preprocessing again. If this parameter is False,
           the hdf5 and json file are not saved.
    :type skip_save_processed_input: Boolean
    :param skip_save_log: Disables saving TensorBoard
           logs. By default Ludwig saves logs for the TensorBoard, but if it
           is not needed turning it off can slightly increase the
           overall speed..
    :type skip_save_progress: Boolean
    :param output_directory: The directory that will contanin the training
           statistics, the saved model and the training procgress files.
    :type output_directory: filepath (str)
    :param gpus: List of GPUs that are available for training.
    :type gpus: List
    :param gpu_fraction: Fraction of the memory of each GPU to use at
           the beginning of the training. The memory may grow elastically.
    :type gpu_fraction: Integer
    :param random_seed: Random seed used for weights initialization,
           splits and any other random function.
    :type random_seed: Integer
    :param debug: If true turns on tfdbg with inf_or_nan checks.
    :type debug: Boolean
    :returns: None
    """
    # set input features defaults
    if model_definition_file is not None:
        with open(model_definition_file, 'r') as def_file:
            model_definition = merge_with_defaults(yaml.safe_load(def_file))
    else:
        model_definition = merge_with_defaults(model_definition)

    # setup directories and file names
    experiment_dir_name = None
    if model_resume_path is not None:
        if os.path.exists(model_resume_path):
            experiment_dir_name = model_resume_path
        else:
            if is_on_master():
                logger.info('Model resume path does not exists, '
                            'starting training from scratch')
            model_resume_path = None

    if model_resume_path is None:
        if is_on_master():
            experiment_dir_name = get_experiment_dir_name(
                output_directory, experiment_name, model_name)
        else:
            experiment_dir_name = '.'

    # if model_load_path is not None, load its train_set_metadata
    if model_load_path is not None:
        train_set_metadata_json = os.path.join(model_load_path,
                                               TRAIN_SET_METADATA_FILE_NAME)

    description_fn, training_stats_fn, model_dir = get_file_names(
        experiment_dir_name)

    # save description
    description = get_experiment_description(
        model_definition,
        data_csv=data_csv,
        data_train_csv=data_train_csv,
        data_validation_csv=data_validation_csv,
        data_test_csv=data_test_csv,
        data_hdf5=data_hdf5,
        data_train_hdf5=data_train_hdf5,
        data_validation_hdf5=data_validation_hdf5,
        data_test_hdf5=data_test_hdf5,
        metadata_json=train_set_metadata_json,
        random_seed=random_seed)
    if is_on_master():
        save_json(description_fn, description)
        # print description
        logger.info('Experiment name: {}'.format(experiment_name))
        logger.info('Model name: {}'.format(model_name))
        logger.info('Output path: {}'.format(experiment_dir_name))
        logger.info('\n')
        for key, value in description.items():
            logger.info('{}: {}'.format(key, pformat(value, indent=4)))
        logger.info('\n')

    # preprocess
    preprocessed_data = preprocess_for_training(
        model_definition,
        data_df=data_df,
        data_train_df=data_train_df,
        data_validation_df=data_validation_df,
        data_test_df=data_test_df,
        data_csv=data_csv,
        data_train_csv=data_train_csv,
        data_validation_csv=data_validation_csv,
        data_test_csv=data_test_csv,
        data_hdf5=data_hdf5,
        data_train_hdf5=data_train_hdf5,
        data_validation_hdf5=data_validation_hdf5,
        data_test_hdf5=data_test_hdf5,
        train_set_metadata_json=train_set_metadata_json,
        skip_save_processed_input=skip_save_processed_input,
        preprocessing_params=model_definition['preprocessing'],
        random_seed=random_seed)

    (training_set, validation_set, test_set,
     train_set_metadata) = preprocessed_data

    if is_on_master():
        logger.info('Training set: {0}'.format(training_set.size))
        if validation_set is not None:
            logger.info('Validation set: {0}'.format(validation_set.size))
        if test_set is not None:
            logger.info('Test set: {0}'.format(test_set.size))

    # update model definition with metadata properties
    update_model_definition_with_metadata(model_definition, train_set_metadata)

    if is_on_master():
        if not skip_save_model:
            # save train set metadata
            os.makedirs(model_dir, exist_ok=True)
            save_json(os.path.join(model_dir, TRAIN_SET_METADATA_FILE_NAME),
                      train_set_metadata)

    # run the experiment
    model, result = train(training_set=training_set,
                          validation_set=validation_set,
                          test_set=test_set,
                          model_definition=model_definition,
                          save_path=model_dir,
                          model_load_path=model_load_path,
                          resume=model_resume_path is not None,
                          skip_save_model=skip_save_model,
                          skip_save_progress=skip_save_progress,
                          skip_save_log=skip_save_log,
                          gpus=gpus,
                          gpu_fraction=gpu_fraction,
                          use_horovod=use_horovod,
                          random_seed=random_seed,
                          debug=debug)

    train_trainset_stats, train_valisest_stats, train_testset_stats = result
    train_stats = {
        'train': train_trainset_stats,
        'validation': train_valisest_stats,
        'test': train_testset_stats
    }

    if should_close_session:
        model.close_session()

    if is_on_master():
        # save training and test statistics
        save_json(training_stats_fn, train_stats)

    # grab the results of the model with highest validation test performance
    validation_field = model_definition['training']['validation_field']
    validation_measure = model_definition['training']['validation_measure']
    validation_field_result = train_valisest_stats[validation_field]

    best_function = get_best_function(validation_measure)
    # results of the model with highest validation test performance
    if is_on_master() and validation_set is not None:
        epoch_best_vali_measure, best_vali_measure = best_function(
            enumerate(validation_field_result[validation_measure]),
            key=lambda pair: pair[1])
        logger.info(
            'Best validation model epoch: {0}'.format(epoch_best_vali_measure +
                                                      1))
        logger.info(
            'Best validation model {0} on validation set {1}: {2}'.format(
                validation_measure, validation_field, best_vali_measure))
        if test_set is not None:
            best_vali_measure_epoch_test_measure = train_testset_stats[
                validation_field][validation_measure][epoch_best_vali_measure]

            logger.info(
                'Best validation model {0} on test set {1}: {2}'.format(
                    validation_measure, validation_field,
                    best_vali_measure_epoch_test_measure))
        logger.info('\nFinished: {0}_{1}'.format(experiment_name, model_name))
        logger.info('Saved to: {0}'.format(experiment_dir_name))

    contrib_command("train_save", experiment_dir_name)

    return (model, preprocessed_data, experiment_dir_name, train_stats,
            model_definition)
Exemplo n.º 20
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script loads a pretrained model '
        'and tests its performance by comparing'
        'its predictions with ground truth.',
        prog='ludwig test',
        usage='%(prog)s [options]')

    # ---------------
    # Data parameters
    # ---------------
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument(
        '--data_csv',
        help='input data CSV file. '
        'If it has a split column, it will be used for splitting '
        '(0: train, 1: validation, 2: test), '
        'otherwise the dataset will be randomly split')
    group.add_argument(
        '--data_hdf5',
        help='input data HDF5 file. It is an intermediate preprocess version of'
        ' the input CSV created the first time a CSV file is used in the '
        'same directory with the same name and a hdf5 extension')
    parser.add_argument(
        '--train_set_metadata_json',
        help='input metadata JSON file. It is an intermediate preprocess file '
        'containing the mappings of the input CSV created the first time '
        'a CSV file is used in the same directory with the same name and '
        'a json extension')

    parser.add_argument('-s',
                        '--split',
                        default=TEST,
                        choices=[TRAINING, VALIDATION, TEST, FULL],
                        help='the split to test the model on')

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument('-m',
                        '--model_path',
                        help='model to load',
                        required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument('-od',
                        '--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')
    parser.add_argument('-ssuo',
                        '--skip_save_unprocessed_output',
                        help='skips saving intermediate NPY output files',
                        action='store_true',
                        default=False)

    # ------------------
    # Generic parameters
    # ------------------
    parser.add_argument('-bs',
                        '--batch_size',
                        type=int,
                        default=128,
                        help='size of batches')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument('-g',
                        '--gpus',
                        type=int,
                        default=0,
                        help='list of gpu to use')
    parser.add_argument(
        '-gf',
        '--gpu_fraction',
        type=float,
        default=1.0,
        help='fraction of gpu memory to initialize the process with')
    parser.add_argument('-uh',
                        '--use_horovod',
                        action='store_true',
                        default=False,
                        help='uses horovod for distributed training')
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    args = parser.parse_args(sys_argv)
    args.evaluate_performance = True

    logging.getLogger('ludwig').setLevel(
        logging_level_registry[args.logging_level])
    set_on_master(args.use_horovod)

    if is_on_master():
        print_ludwig('Test', LUDWIG_VERSION)

    full_predict(**vars(args))
Exemplo n.º 21
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(description='This script trains a model.',
                                     prog='ludwig train',
                                     usage='%(prog)s [options]')

    # ----------------------------
    # Experiment naming parameters
    # ----------------------------
    parser.add_argument('--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')
    parser.add_argument('--experiment_name',
                        type=str,
                        default='experiment',
                        help='experiment name')
    parser.add_argument('--model_name',
                        type=str,
                        default='run',
                        help='name for the model')

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        '--data_csv',
        help='input data CSV file. '
        'If it has a split column, it will be used for splitting '
        '(0: train, 1: validation, 2: test), '
        'otherwise the dataset will be randomly split')
    parser.add_argument('--data_train_csv', help='input train data CSV file')
    parser.add_argument('--data_validation_csv',
                        help='input validation data CSV file')
    parser.add_argument('--data_test_csv', help='input test data CSV file')

    parser.add_argument(
        '--data_hdf5',
        help='input data HDF5 file. It is an intermediate preprocess version of'
        ' the input CSV created the first time a CSV file is used in the '
        'same directory with the same name and a hdf5 extension')
    parser.add_argument(
        '--data_train_hdf5',
        help='input train data HDF5 file. It is an intermediate preprocess '
        'version of the input CSV created the first time a CSV file is '
        'used in the same directory with the same name and a hdf5 '
        'extension')
    parser.add_argument(
        '--data_validation_hdf5',
        help='input validation data HDF5 file. It is an intermediate preprocess'
        ' version of the input CSV created the first time a CSV file is '
        'used in the same directory with the same name and a hdf5 '
        'extension')
    parser.add_argument(
        '--data_test_hdf5',
        help='input test data HDF5 file. It is an intermediate preprocess '
        'version of the input CSV created the first time a CSV file is '
        'used in the same directory with the same name and a hdf5 '
        'extension')

    parser.add_argument(
        '--train_set_metadata_json',
        help='input metadata JSON file. It is an intermediate preprocess file '
        'containing the mappings of the input CSV created the first time a'
        ' CSV file is used in the same directory with the same name and a '
        'json extension')

    parser.add_argument('-sspi',
                        '--skip_save_processed_input',
                        help='skips saving intermediate HDF5 and JSON files',
                        action='store_true',
                        default=False)

    # ----------------
    # Model parameters
    # ----------------
    model_definition = parser.add_mutually_exclusive_group(required=True)
    model_definition.add_argument('-md',
                                  '--model_definition',
                                  type=yaml.safe_load,
                                  help='model definition')
    model_definition.add_argument(
        '-mdf',
        '--model_definition_file',
        help='YAML file describing the model. Ignores --model_hyperparameters')

    parser.add_argument(
        '-mlp',
        '--model_load_path',
        help='path of a pretrained model to load as initialization')
    parser.add_argument(
        '-mrp',
        '--model_resume_path',
        help='path of a the model directory to resume training of')
    parser.add_argument(
        '-ssm',
        '--skip_save_model',
        action='store_true',
        default=False,
        help='disables saving weights each time the model imrpoves. '
        'By default Ludwig saves  weights after each epoch '
        'the validation measure imrpvoes, but  if the model is really big '
        'that can be time consuming if you do not want to keep '
        'the weights and just find out what performance can a model get '
        'with a set of hyperparameters, use this parameter to skip it.')
    parser.add_argument(
        '-ssp',
        '--skip_save_progress',
        action='store_true',
        default=False,
        help='disables saving weights after each epoch. By default ludwig saves '
        'weights after each epoch for enabling resuming of training, but '
        'if the model is really big that can be time consuming and will '
        'save twice as much space, use this parameter to skip it.')
    parser.add_argument(
        '-ssl',
        '--skip_save_log',
        action='store_true',
        default=False,
        help='disables saving TensorBoard logs. By default Ludwig saves '
        'logs for the TensorBoard, but if it is not needed turning it off '
        'can slightly increase the overall speed.')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        '-rs',
        '--random_seed',
        type=int,
        default=42,
        help='a random seed that is going to be used anywhere there is a call '
        'to a random number generator: data splitting, parameter '
        'initialization and training set shuffling')
    parser.add_argument('-g',
                        '--gpus',
                        nargs='+',
                        type=int,
                        default=None,
                        help='list of gpus to use')
    parser.add_argument(
        '-gf',
        '--gpu_fraction',
        type=float,
        default=1.0,
        help='fraction of gpu memory to initialize the process with')
    parser.add_argument('-uh',
                        '--use_horovod',
                        action='store_true',
                        default=False,
                        help='uses horovod for distributed training')
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    args = parser.parse_args(sys_argv)

    logging.getLogger('ludwig').setLevel(
        logging_level_registry[args.logging_level])
    set_on_master(args.use_horovod)

    if is_on_master():
        print_ludwig('Train', LUDWIG_VERSION)

    full_train(**vars(args))
Exemplo n.º 22
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script trains and tests a model.',
        prog='ludwig experiment',
        usage='%(prog)s [options]'
    )

    # ----------------------------
    # Experiment naming parameters
    # ----------------------------
    parser.add_argument(
        '--output_directory',
        type=str,
        default='results',
        help='directory that contains the results'
    )
    parser.add_argument(
        '--experiment_name',
        type=str,
        default='experiment',
        help='experiment name'
    )
    parser.add_argument(
        '--model_name',
        type=str,
        default='run',
        help='name for the model'
    )

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        '--data_csv',
        help='input data CSV file. If it has a split column, it will be used '
             'for splitting (0: train, 1: validation, 2: test), otherwise the '
             'dataset will be randomly split'
    )
    parser.add_argument('--data_train_csv', help='input train data CSV file')
    parser.add_argument(
        '--data_validation_csv',
        help='input validation data CSV file'
    )
    parser.add_argument('--data_test_csv', help='input test data CSV file')

    parser.add_argument(
        '--data_hdf5',
        help='input data HDF5 file. It is an intermediate preprocess version of'
             ' the input CSV created the first time a CSV file is used in the '
             'same directory with the same name and a hdf5 extension'
    )
    parser.add_argument(
        '--data_train_hdf5',
        help='input train data HDF5 file. It is an intermediate preprocess '
             'version of the input CSV created the first time a CSV file is '
             'used in the same directory with the same name and a hdf5 '
             'extension'
    )
    parser.add_argument(
        '--data_validation_hdf5',
        help='input validation data HDF5 file. It is an intermediate preprocess'
             ' version of the input CSV created the first time a CSV file is '
             'used in the same directory with the same name and a hdf5 '
             'extension'
    )
    parser.add_argument(
        '--data_test_hdf5',
        help='input test data HDF5 file. It is an intermediate preprocess '
             'version of the input CSV created the first time a CSV file is '
             'used in the same directory with the same name and a hdf5 '
             'extension'
    )

    parser.add_argument(
        '--metadata_json',
        help='input metadata JSON file. It is an intermediate preprocess file'
             ' containing the mappings of the input CSV created the first time '
             'a CSV file is used in the same directory with the same name and a'
             ' json extension'
    )

    parser.add_argument(
        '-sspi',
        '--skip_save_processed_input',
        help='skips saving intermediate HDF5 and JSON files',
        action='store_true',
        default=False
    )
    parser.add_argument(
        '-ssuo',
        '--skip_save_unprocessed_output',
        help='skips saving intermediate NPY output files',
        action='store_true',
        default=False
    )

    # ----------------
    # Model parameters
    # ----------------
    model_definition = parser.add_mutually_exclusive_group(required=True)
    model_definition.add_argument(
        '-md',
        '--model_definition',
        type=yaml.load,
        help='model definition'
    )
    model_definition.add_argument(
        '-mdf',
        '--model_definition_file',
        help='YAML file describing the model. Ignores --model_hyperparameters'
    )

    parser.add_argument(
        '-mlp',
        '--model_load_path',
        help='path of a pretrained model to load as initialization'
    )
    parser.add_argument(
        '-mrp',
        '--model_resume_path',
        help='path of a the model directory to resume training of'
    )
    parser.add_argument(
        '-sspw',
        '--skip_save_progress_weights',
        help='does not save weights after each epoch. By default Ludwig saves '
             'weights after each epoch for enabling resuming of training, but '
             'if the model is really big that can be time consuming and will '
             'use twice as much storage space, use this parameter to skip it.'
    )

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        '-rs',
        '--random_seed',
        type=int,
        default=42,
        help='a random seed that is going to be used anywhere there is a call '
             'to a random number generator: data splitting, parameter '
             'initialization and training set shuffling'
    )
    parser.add_argument(
        '-g',
        '--gpus',
        nargs='+',
        type=int,
        default=None,
        help='list of GPUs to use'
    )
    parser.add_argument(
        '-gf',
        '--gpu_fraction',
        type=float,
        default=1.0,
        help='fraction of gpu memory to initialize the process with'
    )
    parser.add_argument(
        '-uh',
        '--use_horovod',
        action='store_true',
        default=False,
        help='uses horovod for distributed training'
    )
    parser.add_argument(
        '-dbg',
        '--debug',
        action='store_true',
        default=False,
        help='enables debugging mode'
    )
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset']
    )

    args = parser.parse_args(sys_argv)

    logging.basicConfig(
        stream=sys.stdout,
        level=logging_level_registry[args.logging_level],
        format='%(message)s'
    )

    set_on_master(args.use_horovod)

    if is_on_master():
        print_ludwig('Experiment', LUDWIG_VERSION)

    experiment(**vars(args))