Пример #1
0
def collect_weights(
        model_path,
        tensors,
        output_directory='results',
        debug=False,
        **kwargs
):
    # setup directories and file names
    experiment_dir_name = find_non_existing_dir_by_adding_suffix(output_directory)

    logger.info('Model path: {}'.format(model_path))
    logger.info('Output path: {}'.format(experiment_dir_name))
    logger.info('\n')

    model, model_definition = load_model_and_definition(model_path)

    # collect weights
    print_boxed('COLLECT WEIGHTS')
    collected_tensors = model.collect_weights(tensors)

    # saving
    os.makedirs(experiment_dir_name)
    saved_filenames = save_tensors(collected_tensors, experiment_dir_name)

    logger.info('Saved to: {0}'.format(experiment_dir_name))
    return saved_filenames
Пример #2
0
def get_experiment_dir_name(
        output_directory,
        experiment_name,
        model_name='run'
):
    base_dir_name = os.path.join(
        output_directory,
        experiment_name + ('_' if model_name else '') + model_name
    )
    return find_non_existing_dir_by_adding_suffix(base_dir_name)
Пример #3
0
def full_predict(model_path,
                 data_csv=None,
                 data_hdf5=None,
                 split=TEST,
                 batch_size=128,
                 skip_save_unprocessed_output=False,
                 skip_save_test_predictions=False,
                 skip_save_test_statistics=False,
                 output_directory='results',
                 evaluate_performance=True,
                 gpus=None,
                 gpu_memory_limit=None,
                 allow_parallel_threads=True,
                 use_horovod=None,
                 debug=False,
                 **kwargs):
    set_on_master(use_horovod)
    if is_on_master():
        logger.info('Dataset path: {}'.format(
            data_csv if data_csv is not None else data_hdf5))
        logger.info('Model path: {}'.format(model_path))
        logger.info('')

    train_set_metadata_json_fp = os.path.join(model_path,
                                              TRAIN_SET_METADATA_FILE_NAME)

    # preprocessing
    dataset, train_set_metadata = preprocess_for_prediction(
        model_path, split, data_csv, data_hdf5, train_set_metadata_json_fp,
        evaluate_performance)

    # run the prediction
    if is_on_master():
        print_boxed('LOADING MODEL')
    model, model_definition = load_model_and_definition(
        model_path,
        use_horovod=use_horovod,
        gpus=gpus,
        gpu_memory_limit=gpu_memory_limit,
        allow_parallel_threads=allow_parallel_threads)

    prediction_results = predict(dataset, train_set_metadata, model,
                                 model_definition, batch_size,
                                 evaluate_performance, debug)
    if is_on_master():
        # setup directories and file names
        experiment_dir_name = find_non_existing_dir_by_adding_suffix(
            output_directory)

        # if we are skipping all saving,
        # there is no need to create a directory that will remain empty
        should_create_exp_dir = not (skip_save_unprocessed_output
                                     and skip_save_test_predictions
                                     and skip_save_test_statistics)
        if should_create_exp_dir:
            os.makedirs(experiment_dir_name)

        # postprocess
        postprocessed_output = postprocess(
            prediction_results, model_definition['output_features'],
            train_set_metadata, experiment_dir_name,
            skip_save_unprocessed_output or not is_on_master())

        if not skip_save_test_predictions:
            save_prediction_outputs(postprocessed_output, experiment_dir_name)

        if evaluate_performance:
            print_test_results(prediction_results)
            if not skip_save_test_statistics:
                save_test_statistics(prediction_results, experiment_dir_name)

        logger.info('Saved to: {0}'.format(experiment_dir_name))
Пример #4
0
def collect_activations(
        model_path,
        tensors,
        data_csv=None,
        data_hdf5=None,
        split=TEST,
        batch_size=128,
        output_directory='results',
        gpus=None,
        gpu_memory_limit=None,
        allow_parallel_threads=True,
        debug=False,
        **kwargs
):
    """Uses the pretrained model to collect the tensors corresponding to a
    datapoint in the dataset. Saves the tensors to the experiment directory

    :param model_path: Is the model from which the tensors will be collected
    :param tensors: List contaning the names of the tensors to collect
    :param data_csv: The CSV filepath which contains the datapoints from which
           the tensors are collected
    :param data_hdf5: The HDF5 file path if the CSV file path does not exist,
           an alternative source of providing the data to the model
    :param split: Split type
    :param batch_size: Batch size
    :param output_directory: Output directory
    :param gpus: The total number of GPUs that the model intends to use
    :param gpu_memory_limit: (int: default: `None`) maximum memory in MB to allocate
           per GPU device.
    :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow to use
           multithreading parallelism to improve performance at the cost of
           determinism.
    :param debug: To step through the stack traces and find possible errors
    :returns: None

    """
    # setup directories and file names
    experiment_dir_name = find_non_existing_dir_by_adding_suffix(output_directory)

    logger.info('Dataset path: {}'.format(
        data_csv if data_csv is not None else data_hdf5)
    )
    logger.info('Model path: {}'.format(model_path))
    logger.info('Output path: {}'.format(experiment_dir_name))
    logger.info('\n')

    train_set_metadata_fp = os.path.join(
        model_path,
        TRAIN_SET_METADATA_FILE_NAME
    )

    # preprocessing
    dataset, train_set_metadata = preprocess_for_prediction(
        model_path,
        split,
        data_csv,
        data_hdf5,
        train_set_metadata_fp
    )

    model, model_definition = load_model_and_definition(model_path,
                                                        gpus=gpus,
                                                        gpu_memory_limit=gpu_memory_limit,
                                                        allow_parallel_threads=allow_parallel_threads)

    # collect activations
    print_boxed('COLLECT ACTIVATIONS')
    collected_tensors = model.collect_activations(
        dataset,
        tensors,
        batch_size
    )

    # saving
    os.makedirs(experiment_dir_name)
    save_tensors(collected_tensors, experiment_dir_name)

    logger.info('Saved to: {0}'.format(experiment_dir_name))