Пример #1
0
def simulate(simulator_name,
             sample_label,
             theta0=None,
             theta1=None,
             draw_from=None,
             single_theta=False,
             grid_sampling=False,
             generate_joint_ratio=True,
             generate_joint_score=True,
             n_samples_per_theta=1000,
             random_state=None):
    """
    Draws sample from a simulator.

    :param grid_sampling:
    :param sample_label:
    :param simulator_name: Specifies the simulator. Currently supported are 'galton' and 'epidemiology'.
    :param theta0: None or ndarray that provides a list of theta0 values (the numerator of the likelihood ratio as well
                   as the score reference point). If None, load simulator defaults.
    :param theta1: None or ndarray that provides a list of theta1 values (the denominator of the likelihood ratio) with
                   same shape as theta0. If None, load simulator defaults.
    :param draw_from: list, either [0], [1], or None (= [0,1]). Determines whether theta0, theta1, or both are used for
                      the sampling.
    :param generate_joint_ratio: bool, whether to ask the simulator for the joint ratio (only if theta1 is given).
    :param generate_joint_score: bool, whether to ask the simulator for the joint score.
    :param n_samples_per_theta: Number of samples per combination of theta0 and theta1.
    :param random_state: Numpy random state.
    """

    logging.info('Starting simulation')
    logging.info('  Simulator:            %s', simulator_name)
    logging.info('  Sample:               %s', sample_label)
    logging.info('  theta0:               %s',
                 'default' if theta0 is None else theta0)
    logging.info('  theta1:               %s',
                 'default' if theta1 is None else theta1)
    if theta0 is None:
        if single_theta:
            logging.info('  theta sampling:       single theta')
        else:
            logging.info('  theta sampling:       %s',
                         ('grid' if grid_sampling else 'random'))
    logging.info('  Samples / theta:      %s', n_samples_per_theta)
    logging.info('  Generate joint ratio: %s', generate_joint_ratio)
    logging.info('  Generate joint score: %s', generate_joint_score)

    # Check paths
    create_missing_folders(base_dir, simulator_name)

    simulator = create_simulator(simulator_name)

    # Load data
    if theta0 is not None:
        theta0 = np.load(base_dir + '/goldmine/data/thetas/' + simulator_name +
                         '/' + theta0)

    if theta1 is not None:
        theta1 = np.load(base_dir + '/goldmine/data/thetas/' + simulator_name +
                         '/' + theta1)

    # Filenames
    folder = base_dir + '/goldmine/data/samples/' + simulator_name
    filename = sample_label
    if single_theta:
        filename += '_singletheta'

    # Default thetas
    if theta0 is None:
        theta0, theta1 = simulator.theta_defaults(single_theta=single_theta,
                                                  random=not grid_sampling)

    # Check thetas
    has_theta1 = (theta1 is not None)

    if has_theta1:
        if theta1.shape != theta0.shape:
            raise ValueError('theta0 and theta1 have different shapes: %s, %s',
                             theta0.shape, theta1.shape)
        if draw_from is None:
            draw_from = [0, 1]
        if draw_from not in [[0], [1], [0, 1]]:
            raise ValueError(
                'draw_from has value other than [0], [1], [0,1]: %s',
                draw_from)

    else:
        theta1 = np.empty_like(theta0)
        theta1[:] = np.NaN
        generate_joint_ratio = False

        if draw_from is None:
            draw_from = [0]
        if draw_from not in [[0]]:
            raise ValueError(
                'No theta1, and draw_from has value other than [0]: %s',
                draw_from)

    n_samples_per_theta_and_draw = n_samples_per_theta // len(draw_from)

    # Data to be generated
    all_theta0 = []
    all_theta1 = []
    all_x = []
    all_y = []
    all_r_xz = []
    all_t_xz = []

    logging.info('Parameter points:')
    logging.info('theta0 = %s', theta0)
    if has_theta1:
        logging.info('theta1 = %s', theta1)

    # Loop over thetas and run simulator
    for theta0_, theta1_ in zip(theta0, theta1):
        for y in draw_from:

            if generate_joint_ratio and generate_joint_score:
                x, r_xz, t_xz = simulator.rvs_ratio_score(
                    theta=theta0_,
                    theta0=theta0_,
                    theta1=theta1_,
                    theta_score=theta0_,
                    n=n_samples_per_theta_and_draw,
                    random_state=random_state)
            elif generate_joint_ratio:
                x, r_xz = simulator.rvs_ratio(theta=theta0_,
                                              theta0=theta0_,
                                              theta1=theta1_,
                                              n=n_samples_per_theta_and_draw,
                                              random_state=random_state)
            elif generate_joint_score:
                x, t_xz = simulator.rvs_score(theta=theta0_,
                                              theta_score=theta0_,
                                              n=n_samples_per_theta_and_draw,
                                              random_state=random_state)
            else:
                x = simulator.rvs(theta=theta0_,
                                  n=n_samples_per_theta_and_draw,
                                  random_state=random_state)

            all_theta0 += [theta0_] * n_samples_per_theta_and_draw
            all_theta1 += [theta1_] * n_samples_per_theta_and_draw
            all_x += list(x)
            all_y += [y] * n_samples_per_theta_and_draw
            if generate_joint_ratio:
                all_r_xz += list(r_xz)
            if generate_joint_score:
                all_t_xz += list(t_xz)

    logging.info('Saving results')

    # Save results
    np.save(folder + '/theta0_' + filename + '.npy', all_theta0)
    np.save(folder + '/theta1_' + filename + '.npy', all_theta1)
    np.save(folder + '/x_' + filename + '.npy', all_x)
    np.save(folder + '/y_' + filename + '.npy', all_y)
    if generate_joint_ratio:
        np.save(folder + '/r_xz_' + filename + '.npy', all_r_xz)
    if generate_joint_score:
        np.save(folder + '/t_xz_' + filename + '.npy', all_t_xz)
Пример #2
0
def simulate(simulator_name,
             sample_label,
             theta0=None,
             theta1=None,
             draw_from=None,
             single_theta=False,
             grid_sampling=False,
             generate_joint_ratio=True,
             generate_joint_score=True,
             checkpoint=False,
             n_thetas=1000,
             n_samples_per_theta=1000,
             random_state=None,
             continue_after_exceptions=True):
    """
    Draws sample from a simulator.

    :param continue_after_exceptions:
    :param single_theta:
    :param grid_sampling:
    :param sample_label:
    :param simulator_name: Specifies the simulator. Currently supported are 'galton' and 'epidemiology'.
    :param theta0: None or ndarray that provides a list of theta0 values (the numerator of the likelihood ratio as well
                   as the score reference point). If None, load simulator defaults.
    :param theta1: None or ndarray that provides a list of theta1 values (the denominator of the likelihood ratio) with
                   same shape as theta0. If None, load simulator defaults.
    :param draw_from: list, either [0], [1], or None (= [0,1]). Determines whether theta0, theta1, or both are used for
                      the sampling.
    :param generate_joint_ratio: bool, whether to ask the simulator for the joint ratio (only if theta1 is given).
    :param generate_joint_score: bool, whether to ask the simulator for the joint score.
    :param checkpoint: bool, whether to use a checkpointed version of the simulator.
    :param n_thetas: int, number of thetas samples of theta0 is None and single_theta is False
    :param n_samples_per_theta: Number of samples per combination of theta0 and theta1.
    :param random_state: Numpy random state.
    """

    logging.info('Starting simulation')
    logging.info('  Simulator:                 %s', simulator_name)
    logging.info('  Checkpoint:                %s', checkpoint)
    logging.info('  Sample:                    %s', sample_label)
    logging.info('  theta0:                    %s',
                 'default' if theta0 is None else theta0)
    logging.info('  theta1:                    %s',
                 'default' if theta1 is None else theta1)
    if theta0 is None:
        if single_theta:
            logging.info('  theta sampling:            single theta')
        else:
            logging.info('  theta sampling:            %s',
                         ('grid' if grid_sampling else 'random'))
            logging.info('  Number of thetas:          %s', n_thetas)
    logging.info('  Samples / theta:           %s', n_samples_per_theta)
    logging.info('  Generate joint ratio:      %s', generate_joint_ratio)
    logging.info('  Generate joint score:      %s', generate_joint_score)
    logging.info('  Continue after exceptions: %s', continue_after_exceptions)

    # Check paths
    create_missing_folders(base_dir, simulator_name)

    simulator = create_simulator(simulator_name, checkpoint)

    # Load data
    if theta0 is not None:
        theta0 = np.load(base_dir + '/goldmine/data/thetas/' + simulator_name +
                         '/' + theta0)

    if theta1 is not None:
        theta1 = np.load(base_dir + '/goldmine/data/thetas/' + simulator_name +
                         '/' + theta1)

    # Filenames
    folder = base_dir + '/goldmine/data/samples/' + simulator_name
    filename = sample_label
    if single_theta:
        filename += '_singletheta'

    # Default thetas
    if theta0 is None:
        theta0, theta1 = simulator.theta_defaults(single_theta=single_theta,
                                                  n_thetas=n_thetas,
                                                  random=not grid_sampling)

    # Check thetas
    has_theta1 = (theta1 is not None)

    if has_theta1:
        if theta1.shape != theta0.shape:
            raise ValueError('theta0 and theta1 have different shapes: %s, %s',
                             theta0.shape, theta1.shape)
        if draw_from is None:
            draw_from = [0, 1]
        if draw_from not in [[0], [1], [0, 1]]:
            raise ValueError(
                'draw_from has value other than [0], [1], [0,1]: %s',
                draw_from)

    else:
        theta1 = np.empty_like(theta0)
        theta1[:] = np.NaN

        if generate_joint_ratio:
            logging.warning(
                'Joint ratio requested, but theta1 not given -- will just generate joint score.'
            )
        generate_joint_ratio = False

        if draw_from is None:
            draw_from = [0]
        if draw_from not in [[0]]:
            raise ValueError(
                'No theta1, and draw_from has value other than [0]: %s',
                draw_from)

    n_samples_per_theta_and_draw = n_samples_per_theta // len(draw_from)

    # Data to be generated
    all_theta0 = []
    all_theta1 = []
    all_x = []
    all_y = []
    all_r_xz = []
    all_t_xz = []
    all_z_checkpoints = []
    all_r_xz_checkpoints = []
    all_t_xz_checkpoints = []

    logging.info('Parameter points:')
    logging.info('  theta0 = %s', theta0)
    if has_theta1:
        logging.info('  theta1 = %s', theta1)

    # Loop over thetas and run simulator
    n_simulations = len(list(zip(theta0, theta1)))
    n_verbose = max(n_simulations // 100, 1)

    for i_simulation, (theta0_, theta1_) in enumerate(zip(theta0, theta1)):

        if (i_simulation + 1) % n_verbose == 0:
            logging.info(
                'Starting simulation for parameter setup %s / %s: theta0 = %s, theta1 = %s',
                i_simulation + 1, n_simulations, theta0_, theta1_)

        for y in draw_from:

            t_xz = None
            r_xz = None
            z_checkpoints = None
            r_xz_checkpoints = None
            t_xz_checkpoints = None

            try:
                if checkpoint and generate_joint_ratio and generate_joint_score:
                    x, r_xz, t_xz, z_checkpoints, r_xz_checkpoints, t_xz_checkpoints = simulator.rvs_ratio_score(
                        theta=theta0_,
                        theta0=theta0_,
                        theta1=theta1_,
                        theta_score=theta0_,
                        n=n_samples_per_theta_and_draw,
                        random_state=random_state)
                elif checkpoint and generate_joint_ratio:
                    x, r_xz, z_checkpoints, r_xz_checkpoints = simulator.rvs_ratio(
                        theta=theta0_,
                        theta0=theta0_,
                        theta1=theta1_,
                        n=n_samples_per_theta_and_draw,
                        random_state=random_state)
                elif checkpoint and generate_joint_score:
                    x, t_xz, z_checkpoints, t_xz_checkpoints = simulator.rvs_score(
                        theta=theta0_,
                        theta_score=theta0_,
                        n=n_samples_per_theta_and_draw,
                        random_state=random_state)
                elif generate_joint_ratio and generate_joint_score:
                    x, r_xz, t_xz = simulator.rvs_ratio_score(
                        theta=theta0_,
                        theta0=theta0_,
                        theta1=theta1_,
                        theta_score=theta0_,
                        n=n_samples_per_theta_and_draw,
                        random_state=random_state)
                elif generate_joint_ratio:
                    x, r_xz = simulator.rvs_ratio(
                        theta=theta0_,
                        theta0=theta0_,
                        theta1=theta1_,
                        n=n_samples_per_theta_and_draw,
                        random_state=random_state)
                elif generate_joint_score:
                    x, t_xz = simulator.rvs_score(
                        theta=theta0_,
                        theta_score=theta0_,
                        n=n_samples_per_theta_and_draw,
                        random_state=random_state)
                else:
                    x = simulator.rvs(theta=theta0_,
                                      n=n_samples_per_theta_and_draw,
                                      random_state=random_state)

                all_theta0 += [theta0_] * n_samples_per_theta_and_draw
                all_theta1 += [theta1_] * n_samples_per_theta_and_draw
                all_x += list(x)
                all_y += [y] * n_samples_per_theta_and_draw
                if generate_joint_ratio:
                    all_r_xz += list(r_xz)
                if generate_joint_score:
                    all_t_xz += list(t_xz)
                if checkpoint and (generate_joint_ratio
                                   or generate_joint_score):
                    all_z_checkpoints += list(z_checkpoints)
                    if generate_joint_ratio:
                        all_r_xz_checkpoints += list(r_xz_checkpoints)
                    if generate_joint_score:
                        all_t_xz_checkpoints += list(t_xz_checkpoints)

            except SimulatorException as e:
                logging.warning('Simulator raised exception: %s', e)

                if continue_after_exceptions:
                    logging.info(
                        'Ignoring this parameter point and continuing with others.'
                    )
                else:
                    raise

    all_theta0 = np.array(all_theta0)
    all_theta1 = np.array(all_theta1)
    all_x = np.array(all_x)
    all_y = np.array(all_y)
    if generate_joint_ratio:
        all_r_xz = np.array(all_r_xz)
    if generate_joint_score:
        all_t_xz = np.array(all_t_xz)
    if checkpoint and (generate_joint_ratio or generate_joint_score):
        all_z_checkpoints = np.array(all_z_checkpoints)
        if generate_joint_ratio:
            all_r_xz_checkpoints = np.array(all_r_xz_checkpoints)
        if generate_joint_score:
            all_t_xz_checkpoints = np.array(all_t_xz_checkpoints)

    # Debug output
    for i_event in range(min(10, len(all_z_checkpoints))):
        logging.debug('Checkpoint information for event %s:', i_event + 1)
        for i_checkpoint in range(all_z_checkpoints.shape[1]):
            logging.debug('  CP %s: z = %s, r = %s, t = %s', i_checkpoint + 1,
                          all_z_checkpoints[i_event, i_checkpoint],
                          all_r_xz_checkpoints[i_event, i_checkpoint],
                          all_t_xz_checkpoints[i_event, i_checkpoint])
        logging.debug('Sum:   r = %s, t = %s',
                      np.prod(all_r_xz_checkpoints[i_event]),
                      np.sum(all_t_xz_checkpoints[i_event], axis=0))
        logging.debug('Total: r = %s, t = %s', all_r_xz[i_event],
                      all_t_xz[i_event])

    logging.info('Saving results')

    # Save results
    np.save(folder + '/theta0_' + filename + '.npy', all_theta0)
    np.save(folder + '/theta1_' + filename + '.npy', all_theta1)
    np.save(folder + '/x_' + filename + '.npy', all_x)
    np.save(folder + '/y_' + filename + '.npy', all_y)
    if generate_joint_ratio:
        np.save(folder + '/r_xz_' + filename + '.npy', all_r_xz)
    if generate_joint_score:
        np.save(folder + '/t_xz_' + filename + '.npy', all_t_xz)

    if checkpoint and (generate_joint_ratio or generate_joint_score):
        np.save(folder + '/z_checkpoints_' + filename + '.npy',
                all_z_checkpoints)
    if generate_joint_ratio:
        np.save(folder + '/r_xz_checkpoints_' + filename + '.npy',
                all_r_xz_checkpoints)
    if generate_joint_score:
        np.save(folder + '/t_xz_checkpoints_' + filename + '.npy',
                all_t_xz_checkpoints)
Пример #3
0
def test(simulator_name,
         inference_name,
         checkpoint=False,
         run=0,
         alpha=1.,
         model_label='model',
         trained_on_single_theta=False,
         training_sample_size=None,
         test_sample='test',
         evaluate_densities_on_original_theta=True,
         evaluate_densities_on_grid=False,
         evaluate_ratios_on_grid=False,
         evaluate_score_on_original_theta=False,
         theta_grid=None,
         theta1_grid=None,
         generate_samples=False,
         discretize_generated_samples=False,
         grid_n_samples=1000,
         classify_surrogate_vs_true_samples=False):
    """ Main evaluation function """

    logging.info('Starting evaluation')
    logging.info('  Simulator:                        %s', simulator_name)
    logging.info('  Inference method:                 %s', inference_name)
    logging.info('  Checkpoint:                       %s', checkpoint)
    logging.info('  ML model name:                    %s', model_label)
    logging.info('  Run number:                       %s', run)
    logging.info('  Test sample:                      %s', test_sample)
    logging.info('  alpha:                            %s', alpha)
    logging.info('  Single-theta tr. sample:          %s',
                 trained_on_single_theta)
    logging.info(
        '  Training sample size:             %s',
        'maximal' if training_sample_size is None else training_sample_size)
    logging.info('  Evaluate log p on original theta: %s',
                 evaluate_densities_on_original_theta)
    logging.info('  Evaluate log p on grid:           %s',
                 evaluate_densities_on_grid)
    logging.info('  Evaluate ratios on grid:          %s',
                 evaluate_ratios_on_grid)
    if evaluate_densities_on_grid or evaluate_ratios_on_grid:
        if theta_grid is None:
            logging.info(
                '  Theta grid:                       default grid with default resolution'
            )
        elif isinstance(theta_grid, int):
            logging.info(
                '  Theta grid:                       default grid with %s points per dimension',
                theta_grid)
        else:
            logging.info('  Theta grid:                       %s',
                         theta_grid[0])
            for grid_component in theta_grid[1:]:
                logging.info('                                    %s',
                             grid_component)
    if evaluate_ratios_on_grid:
        if theta1_grid is None:
            logging.info('  Denominator theta:                default')
        else:
            logging.info('  Denominator theta:                %s', theta1_grid)
    logging.info('  Grid x points saved:              %s', grid_n_samples)
    logging.info('  Generate samples:                 %s', generate_samples)
    logging.info('  Discretize samples                %s',
                 discretize_generated_samples)
    logging.info('  Classify samples vs true:         %s',
                 classify_surrogate_vs_true_samples)

    # Check paths
    create_missing_folders(base_dir, simulator_name, inference_name)

    # Folders
    sample_folder = base_dir + '/goldmine/data/samples/' + simulator_name
    model_folder = base_dir + '/goldmine/data/models/' + simulator_name + '/' + inference_name
    result_folder = base_dir + '/goldmine/data/results/' + simulator_name + '/' + inference_name

    # Filenames
    model_filename = model_label
    result_filename = ''
    if checkpoint:
        model_folder += '_checkpoint'
        result_folder += '_checkpoint'
    if model_label != 'model':
        result_filename = '_' + model_label
    if trained_on_single_theta:
        model_filename += '_singletheta'
        result_filename += '_trainedonsingletheta'
    if training_sample_size is not None:
        model_filename += '_trainingsamplesize_' + str(training_sample_size)
        result_filename += '_trainingsamplesize_' + str(training_sample_size)
    test_filename = test_sample

    if run is None:
        run_appendix = ''
    elif int(run) == 0:
        run_appendix = ''
    else:
        run_appendix = '_run' + str(int(run))
    model_filename += run_appendix
    result_filename += run_appendix

    # Theta grid
    simulator = None
    if (evaluate_densities_on_grid
            or evaluate_ratios_on_grid) and (theta_grid is None
                                             or isinstance(theta_grid, int)):
        simulator = create_simulator(simulator_name, checkpoint=checkpoint)
        theta_grid = simulator.theta_grid_default(n_points_per_dim=theta_grid)

        logging.info(
            "Created theta grid with %s parameters and %s points each",
            theta_grid.shape[0], theta_grid.shape[1])

    if evaluate_ratios_on_grid and theta1_grid is None:
        if simulator is None:
            simulator = create_simulator(simulator_name, checkpoint=checkpoint)
        _, theta1_grid = simulator.theta_defaults(single_theta=True)
        theta1_grid = theta1_grid[0]

    # # Load train data
    # logging.info('Loading many-theta  train sample')
    # thetas_train = load_and_check(sample_folder + '/theta0_train.npy')
    # xs_train = load_and_check(sample_folder + '/x_train.npy')
    #
    # n_samples_train = xs_train.shape[0]
    # n_observables_train = xs_train.shape[1]
    # n_parameters_train = thetas_train.shape[1]
    # assert thetas_train.shape[0] == n_samples_train
    #
    # logging.info('Found %s samples with %s parameters and %s observables',
    #              n_samples_train, n_parameters_train, n_observables_train)

    # Load test data
    logging.info('Loading many-theta test sample')
    thetas_test = load_and_check(sample_folder + '/theta0_' + test_filename +
                                 '.npy')
    xs_test = load_and_check(sample_folder + '/x_' + test_filename + '.npy')

    n_samples = xs_test.shape[0]
    n_observables = xs_test.shape[1]
    n_parameters = thetas_test.shape[1]
    assert thetas_test.shape[0] == n_samples

    logging.info('Found %s samples with %s parameters and %s observables',
                 n_samples, n_parameters, n_observables)

    # Load test data (single theta)
    logging.info('Loading single-theta test sample')
    thetas_singletheta = load_and_check(sample_folder + '/theta0_' +
                                        test_filename + '_singletheta.npy')
    xs_singletheta = load_and_check(sample_folder + '/x_' + test_filename +
                                    '_singletheta.npy')

    n_samples_singletheta = xs_singletheta.shape[0]
    n_observables_singletheta = xs_singletheta.shape[1]
    n_parameters_singletheta = thetas_singletheta.shape[1]
    assert thetas_singletheta.shape[0] == n_samples_singletheta

    logging.info('Found %s samples with %s parameters and %s observables',
                 n_samples_singletheta, n_parameters_singletheta,
                 n_observables_singletheta)

    # Load inference model
    logging.info('Loading trained model from %s',
                 model_folder + '/' + model_filename + '.*')
    inference = create_inference(inference_name,
                                 checkpoint=checkpoint,
                                 filename=model_folder + '/' + model_filename)

    # Evaluate density on test sample
    if evaluate_densities_on_original_theta:
        try:
            # logging.info('Estimating densities on train sample')
            # log_p_hat = inference.predict_density(thetas_train, xs_train, log=True)
            # np.save(
            #     result_folder + '/log_p_hat_train' + result_filename + '.npy',
            #     log_p_hat
            # )

            logging.info('Estimating densities on many-theta test sample')
            log_p_hat = inference.predict_density(thetas_test,
                                                  xs_test,
                                                  log=True)
            np.save(
                result_folder + '/log_p_hat_' + test_filename +
                result_filename + '.npy', log_p_hat)

            logging.info(
                'Estimating densities on single-theta test sample, testing original theta'
            )
            log_p_hat = inference.predict_density(thetas_singletheta,
                                                  xs_singletheta,
                                                  log=True)
            np.save(
                result_folder + '/log_p_hat_' + test_filename +
                '_singletheta' + result_filename + '.npy', log_p_hat)

        except NotImplementedError:
            logging.warning(
                'Inference method %s does not support density evaluation',
                inference_name)

    if evaluate_densities_on_grid:
        try:
            logging.info(
                'Estimating densities on single-theta test sample, testing theta grid'
            )

            theta_grid_points = np.meshgrid(*theta_grid, indexing='ij')
            theta_grid_points = np.array(theta_grid_points).reshape(
                (len(theta_grid), -1))
            theta_grid_points = theta_grid_points.T

            log_p_hat_grid = []

            for theta in theta_grid_points:
                logging.debug('Grid point %s', theta)
                log_p_hat_grid.append(
                    inference.predict_density(theta,
                                              xs_singletheta[:grid_n_samples],
                                              log=True))

            np.save(result_folder + '/theta_grid.npy', theta_grid_points)
            log_p_hat_grid = np.asarray(log_p_hat_grid)
            np.save(
                result_folder + '/log_p_hat_' + test_filename +
                '_singletheta_evaluated_on_grid_' + result_filename + '.npy',
                log_p_hat_grid)

        except NotImplementedError:
            logging.warning(
                'Inference method %s does not support density evaluation',
                inference_name)

    if evaluate_ratios_on_grid:
        try:
            logging.info(
                'Estimating ratios on single-theta test sample, testing theta0 grid'
            )

            theta_grid_points = np.meshgrid(*theta_grid, indexing='ij')
            theta_grid_points = np.array(theta_grid_points).reshape(
                (len(theta_grid), -1))
            theta_grid_points = theta_grid_points.T

            log_r_hat_grid = []

            for theta in theta_grid_points:
                logging.debug('Grid point %s vs %s', theta, theta1_grid)
                log_r_hat_grid.append(
                    inference.predict_ratio(theta,
                                            theta1_grid,
                                            xs_singletheta[:grid_n_samples],
                                            log=True))

            np.save(result_folder + '/theta_grid.npy', theta_grid_points)
            log_r_hat_grid = np.asarray(log_r_hat_grid)
            np.save(
                result_folder + '/log_r_hat_' + test_filename +
                '_singletheta_evaluated_on_grid_' + result_filename + '.npy',
                log_r_hat_grid)

        except NotImplementedError:
            logging.warning(
                'Inference method %s does not support ratio evaluation',
                inference_name)

    if evaluate_score_on_original_theta:
        try:
            # logging.info('Estimating score on train sample')
            # t_hat = inference.predict_score(thetas_train, xs_train)
            # np.save(
            #     result_folder + '/t_hat_train' + result_filename + '.npy',
            #     t_hat
            # )

            logging.info('Estimating score on many-theta test sample')
            t_hat = inference.predict_score(thetas_test, xs_test)
            np.save(
                result_folder + '/t_hat_' + test_filename + result_filename +
                '.npy', t_hat)

            logging.info(
                'Estimating score on single-theta test sample, testing original theta'
            )
            t_hat = inference.predict_score(thetas_singletheta, xs_singletheta)
            np.save(
                result_folder + '/t_hat_' + test_filename + '_singletheta' +
                result_filename + '.npy', t_hat)

        except NotImplementedError:
            logging.warning(
                'Inference method %s does not support score evaluation',
                inference_name)

    # Generate samples
    if generate_samples:
        logging.info('Generating samples according to learned density')
        try:
            xs_surrogate = inference.generate_samples(thetas_singletheta)

            if discretize_generated_samples:
                discretization = create_simulator(
                    simulator_name).get_discretization()

                logging.info('Discretizing data with scheme %s',
                             discretization)
                xs_surrogate = discretize(xs_surrogate, discretization)

            np.save(
                result_folder + '/samples_from_p_hat' + result_filename +
                '.npy', xs_surrogate)
        except NotImplementedError:
            logging.warning(
                'Inference method %s does not support sample generation',
                inference_name)

    # Train classifier to distinguish samples from surrogate from samples from simulator
    if classify_surrogate_vs_true_samples:
        logging.info(
            'Training classifier to discriminate surrogate samples from simulator samples'
        )
        xs_surrogate = load_and_check(result_folder + '/samples_from_p_hat' +
                                      result_filename + '.npy')
        roc_auc, tpr, fpr = discriminate_samples(xs_singletheta, xs_surrogate)
        np.save(
            result_folder + '/roc_auc_surrogate_vs_simulator' +
            result_filename + '.npy', [roc_auc])
        np.save(
            result_folder + '/fpr_surrogate_vs_simulator' + result_filename +
            '.npy', [fpr])
        np.save(
            result_folder + '/tpr_surrogate_vs_simulator' + result_filename +
            '.npy', [tpr])
Пример #4
0
def test(simulator_name,
         inference_name,
         run=0,
         alpha=1.,
         trained_on_single_theta=False,
         training_sample_size=None,
         evaluate_densities=True,
         generate_samples=True,
         discretize_generated_samples=True,
         classify_surrogate_vs_true_samples=True):
    """ Main evaluation function """

    logging.info('Starting evaluation')
    logging.info('  Simulator:                %s', simulator_name)
    logging.info('  Inference method:         %s', inference_name)
    logging.info('  Run number:               %s', run)
    logging.info('  alpha:                    %s', alpha)
    logging.info('  Single-theta tr. sample:  %s', trained_on_single_theta)
    logging.info(
        '  Training sample size:     %s',
        'maximal' if training_sample_size is None else training_sample_size)
    logging.info('  Evaluate densities:       %s', evaluate_densities)
    logging.info('  Generate samples:         %s', generate_samples)
    logging.info('  Discretize samples        %s',
                 discretize_generated_samples)
    logging.info('  Classify samples vs true: %s',
                 classify_surrogate_vs_true_samples)

    # Check paths
    create_missing_folders(base_dir, simulator_name, inference_name)

    # Folders and filenames
    sample_folder = base_dir + '/goldmine/data/samples/' + simulator_name
    model_folder = base_dir + '/goldmine/data/models/' + simulator_name + '/' + inference_name
    result_folder = base_dir + '/goldmine/data/results/' + simulator_name + '/' + inference_name

    model_filename = ''
    result_filename = ''
    if trained_on_single_theta:
        model_filename += '_singletheta'
        result_filename += '_trainedonsingletheta'
    if training_sample_size is not None:
        model_filename += '_trainingsamplesize_' + str(training_sample_size)
        result_filename += '_trainingsamplesize_' + str(training_sample_size)

    if run is None:
        run_appendix = ''
    elif int(run) == 0:
        run_appendix = ''
    else:
        run_appendix = '_run' + str(int(run))

    model_filename += run_appendix
    result_filename += run_appendix

    # Load train data
    logging.info('Loading many-theta  train sample')
    thetas_train = load_and_check(sample_folder + '/theta0_train.npy')
    xs_train = load_and_check(sample_folder + '/x_train.npy')

    n_samples_train = xs_train.shape[0]
    n_observables_train = xs_train.shape[1]
    n_parameters_train = thetas_train.shape[1]
    assert thetas_train.shape[0] == n_samples_train

    logging.info('Found %s samples with %s parameters and %s observables',
                 n_samples_train, n_parameters_train, n_observables_train)

    # Load train data (single theta)
    logging.info('Loading single-theta train sample')
    thetas_train_singletheta = load_and_check(sample_folder +
                                              '/theta0_train_singletheta.npy')
    xs_train_singletheta = load_and_check(sample_folder +
                                          '/x_train_singletheta.npy')

    n_samples_train_singletheta = xs_train_singletheta.shape[0]
    n_observables_train_singletheta = xs_train_singletheta.shape[1]
    n_parameters_train_singletheta = thetas_train_singletheta.shape[1]
    assert thetas_train_singletheta.shape[0] == n_samples_train_singletheta

    logging.info('Found %s samples with %s parameters and %s observables',
                 n_samples_train_singletheta, n_parameters_train_singletheta,
                 n_observables_train_singletheta)

    # Load test data
    logging.info('Loading many-theta test sample')
    thetas_test = load_and_check(sample_folder + '/theta0_test.npy')
    xs_test = load_and_check(sample_folder + '/x_test.npy')

    n_samples = xs_test.shape[0]
    n_observables = xs_test.shape[1]
    n_parameters = thetas_test.shape[1]
    assert thetas_test.shape[0] == n_samples

    logging.info('Found %s samples with %s parameters and %s observables',
                 n_samples, n_parameters, n_observables)

    # Load test data (single theta)
    logging.info('Loading single-theta test sample')
    thetas_singletheta = load_and_check(sample_folder +
                                        '/theta0_test_singletheta.npy')
    xs_singletheta = load_and_check(sample_folder + '/x_test_singletheta.npy')

    n_samples_singletheta = xs_singletheta.shape[0]
    n_observables_singletheta = xs_singletheta.shape[1]
    n_parameters_singletheta = thetas_singletheta.shape[1]
    assert thetas_singletheta.shape[0] == n_samples_singletheta

    logging.info('Found %s samples with %s parameters and %s observables',
                 n_samples_singletheta, n_parameters_singletheta,
                 n_observables_singletheta)

    # Load inference model
    logging.info('Loading trained model from %s',
                 model_folder + '/model' + model_filename + '.*')
    inference = create_inference(inference_name,
                                 filename=model_folder + '/model' +
                                 model_filename)

    # Evaluate density on test sample
    if evaluate_densities:
        try:
            logging.info('Estimating densities on many-theta train sample')
            log_p_hat = inference.predict_density(thetas_train,
                                                  xs_train,
                                                  log=True)
            np.save(
                result_folder + '/log_p_hat_train' + result_filename + '.npy',
                log_p_hat)

            logging.info('Estimating densities on single-theta train sample')
            log_p_hat = inference.predict_density(thetas_train_singletheta,
                                                  xs_train_singletheta,
                                                  log=True)
            np.save(
                result_folder + '/log_p_hat_train_singletheta' +
                result_filename + '.npy', log_p_hat)

            logging.info('Estimating densities on many-theta test sample')
            log_p_hat = inference.predict_density(thetas_test,
                                                  xs_test,
                                                  log=True)
            np.save(
                result_folder + '/log_p_hat_test' + result_filename + '.npy',
                log_p_hat)

            logging.info('Estimating densities on single-theta test sample')
            log_p_hat = inference.predict_density(thetas_singletheta,
                                                  xs_singletheta,
                                                  log=True)
            np.save(
                result_folder + '/log_p_hat_test_singletheta' +
                result_filename + '.npy', log_p_hat)

        except NotImplementedError:
            logging.warning(
                'Inference method %s does not support density evaluation',
                inference_name)

    # TODO: Implement ratio estimation

    # Generate samples
    if generate_samples:
        logging.info('Generating samples according to learned density')
        try:
            xs_surrogate = inference.generate_samples(thetas_singletheta)

            if discretize_generated_samples:
                discretization = create_simulator(
                    simulator_name).get_discretization()

                logging.info('Discretizing data with scheme %s',
                             discretization)
                xs_surrogate = discretize(xs_surrogate, discretization)

            np.save(
                result_folder + '/samples_from_p_hat' + result_filename +
                '.npy', xs_surrogate)
        except NotImplementedError:
            logging.warning(
                'Inference method %s does not support sample generation',
                inference_name)

    # Train classifier to distinguish samples from surrogate from samples from simulator
    if classify_surrogate_vs_true_samples:
        logging.info(
            'Training classifier to discriminate surrogate samples from simulator samples'
        )
        xs_surrogate = load_and_check(result_folder + '/samples_from_p_hat' +
                                      result_filename + '.npy')
        roc_auc, tpr, fpr = discriminate_samples(xs_test, xs_surrogate)
        np.save(
            result_folder + '/roc_auc_surrogate_vs_simulator' +
            result_filename + '.npy', [roc_auc])
        np.save(
            result_folder + '/fpr_surrogate_vs_simulator' + result_filename +
            '.npy', [fpr])
        np.save(
            result_folder + '/tpr_surrogate_vs_simulator' + result_filename +
            '.npy', [tpr])
Пример #5
0
def train(simulator_name,
          inference_name,
          checkpoint=False,
          model_label='model',
          run=0,
          n_components=1,
          n_mades=3,
          hidden_layers=2,
          units_per_layer=20,
          batch_norm=False,
          activation='relu',
          n_bins_theta='auto',
          histogram_observables='all',
          n_bins_x='auto',
          separate_1d_x_histos=False,
          fill_empty_bins=False,
          alpha=0.0001,
          beta=0.0001,
          gamma=1.,
          training_sample='train',
          single_theta=False,
          training_sample_size=None,
          n_epochs=20,
          compensate_sample_size=False,
          batch_size=128,
          trainer='adam',
          initial_lr=0.001,
          final_lr=0.0001,
          validation_split=0.2,
          early_stopping=True,
          pretrain=False,
          pre_alpha=0.0001,
          pre_beta=0.0001,
          pre_gamma=1.,
          sequential_checkpoint_training=False):
    """ Main training function """

    if single_theta:
        n_bins_theta = 1
    if histogram_observables is None:
        histogram_observables = 'all'
    if len(histogram_observables) == 0:
        histogram_observables = 'all'

    logging.info('Starting training')
    logging.info('  Simulator:             %s', simulator_name)
    logging.info('  Inference method:      %s', inference_name)
    logging.info('  Checkpoint:            %s', checkpoint)
    logging.info('  ML model name:         %s', model_label)
    logging.info('  Run number:            %s', run)
    logging.info('  Mixture components:    %s', n_components)
    logging.info('  MADEs:                 %s', n_mades)
    logging.info('  Hidden layers:         %s', hidden_layers)
    logging.info('  Units / layer:         %s', units_per_layer)
    logging.info('  Batch norm:            %s', batch_norm)
    logging.info('  Activation function:   %s', activation)
    logging.info('  Histogram theta bins:  %s', n_bins_theta)
    logging.info('  Histogram observables: %s', histogram_observables)
    logging.info('  Histogram x bins:      %s', separate_1d_x_histos)
    logging.info('  1d x histograms:       %s', n_bins_x)
    logging.info('  Fill empty bins:       %s', fill_empty_bins)
    logging.info('  SCANDAL/RASCAL alpha:  %s', alpha)
    logging.info('  RASCANDAL beta:        %s', beta)
    logging.info('  CV SCANDAL gamma:      %s', gamma)
    logging.info('  Training sample name:  %s', training_sample)
    logging.info('  Train on single theta: %s', single_theta)
    logging.info(
        '  Training sample size:  %s',
        'maximal' if training_sample_size is None else training_sample_size)
    if compensate_sample_size and training_sample_size is not None:
        logging.info(
            '  Epochs:                %s (plus compensation for decreased sample size)',
            n_epochs)
    else:
        logging.info('  Epochs:                %s', n_epochs)
    logging.info('  Batch size:            %s', batch_size)
    logging.info('  Optimizer:             %s', trainer)
    logging.info('  Learning rate:         %s initially, decaying to %s',
                 initial_lr, final_lr)
    logging.info('  Validation split:      %s', validation_split)
    logging.info('  Early stopping:        %s', early_stopping)

    # Check paths
    create_missing_folders(
        base_dir, simulator_name,
        inference_name + '_checkpoint' if checkpoint else inference_name)

    # Folders and filenames
    sample_folder = base_dir + '/goldmine/data/samples/' + simulator_name
    model_folder = base_dir + '/goldmine/data/models/' + simulator_name + '/' + inference_name
    result_folder = base_dir + '/goldmine/data/results/' + simulator_name + '/' + inference_name

    sample_filename = training_sample
    output_filename = model_label
    if checkpoint:
        model_folder += '_checkpoint'
        result_folder += '_checkpoint'
    if single_theta:
        output_filename += '_singletheta'
        sample_filename += '_singletheta'
    if training_sample_size is not None:
        output_filename += '_trainingsamplesize_' + str(training_sample_size)

    if run is None:
        run_appendix = ''
    elif int(run) == 0:
        run_appendix = ''
    else:
        run_appendix = '_run' + str(int(run))
    output_filename += run_appendix

    # Load training data and creating model
    logging.info('Loading %s training data from %s', simulator_name,
                 sample_folder + '/*_' + sample_filename + '.npy')
    thetas = load_and_check(sample_folder + '/theta0_' + sample_filename +
                            '.npy')
    xs = load_and_check(sample_folder + '/x_' + sample_filename + '.npy')
    z_checkpoints = None
    if checkpoint:
        z_checkpoints = load_and_check(sample_folder + '/z_checkpoints_' +
                                       sample_filename + '.npy')

    n_samples = thetas.shape[0]
    n_parameters = thetas.shape[1]
    n_observables = xs.shape[1]
    n_latent = None
    n_checkpoints = None
    if checkpoint:
        n_checkpoints = z_checkpoints.shape[1]
        n_latent = z_checkpoints.shape[2]

    if checkpoint:
        logging.info(
            'Found %s samples with %s parameters, %s observables, and %s  checkpoints with %s latent variables',
            n_samples, n_parameters, n_observables, n_checkpoints, n_latent)
    else:
        logging.info('Found %s samples with %s parameters and %s observables',
                     n_samples, n_parameters, n_observables)

    inference = create_inference(inference_name,
                                 checkpoint=checkpoint,
                                 n_mades=n_mades,
                                 n_components=n_components,
                                 n_made_hidden_layers=hidden_layers,
                                 n_made_units_per_layer=units_per_layer,
                                 n_hidden_layers=hidden_layers,
                                 n_units_per_layer=units_per_layer,
                                 batch_norm=batch_norm,
                                 activation=activation,
                                 n_parameters=n_parameters,
                                 n_observables=n_observables,
                                 n_latent=n_latent,
                                 n_bins_theta=n_bins_theta,
                                 n_bins_x=n_bins_x,
                                 separate_1d_x_histos=separate_1d_x_histos,
                                 observables=histogram_observables)

    # Load more data
    if inference.requires_class_label():
        ys = load_and_check(sample_folder + '/y_' + sample_filename + '.npy')
    else:
        ys = None

    if inference.requires_joint_ratio():
        r_xz = load_and_check(sample_folder + '/r_xz_' + sample_filename +
                              '.npy')
        theta1 = load_and_check(sample_folder + '/theta1_' + sample_filename +
                                '.npy')
        if len(
                theta1.shape
        ) > 1:  # For now, we just want one constant theta1. Might be changed in later versions
            theta1 = theta1[0]
        assert theta1.shape == thetas[
            0].shape, 'Shape mismatch between theta0 and theta1'
    else:
        r_xz = None
        theta1 = None

    if inference.requires_joint_score():
        t_xz = load_and_check(sample_folder + '/t_xz_' + sample_filename +
                              '.npy')
    else:
        t_xz = None

    r_xz_checkpoints = None
    t_xz_checkpoints = None
    if checkpoint:
        if inference.requires_joint_ratio():
            r_xz_checkpoints = load_and_check(sample_folder +
                                              '/r_xz_checkpoints_' +
                                              sample_filename + '.npy')
        if inference.requires_joint_score():
            t_xz_checkpoints = load_and_check(sample_folder +
                                              '/t_xz_checkpoints_' +
                                              sample_filename + '.npy')

    # Restricted training sample size
    if training_sample_size is not None and training_sample_size < n_samples:
        thetas, xs, ys, r_xz, t_xz, z_checkpoints, r_xz_checkpoints, t_xz_checkpoints = shuffle(
            thetas, xs, ys, r_xz, t_xz, z_checkpoints, r_xz_checkpoints,
            t_xz_checkpoints)

        thetas = thetas[:training_sample_size]
        xs = xs[:training_sample_size]
        if ys is not None:
            ys = ys[:training_sample_size]
        if r_xz is not None:
            r_xz = r_xz[:training_sample_size]
        if t_xz is not None:
            t_xz = t_xz[:training_sample_size]
        if z_checkpoints is not None:
            z_checkpoints = z_checkpoints[:training_sample_size]
        if r_xz_checkpoints is not None:
            r_xz_checkpoints = r_xz_checkpoints[:training_sample_size]
        if t_xz_checkpoints is not None:
            t_xz_checkpoints = t_xz_checkpoints[:training_sample_size]

        logging.info('Only using %s of %s training samples',
                     training_sample_size, n_samples)

        if compensate_sample_size and training_sample_size < n_samples:
            n_epochs_compensated = int(
                round(n_epochs * n_samples / training_sample_size, 0))
            logging.info(
                'Compensating by increasing number of epochs from %s to %s',
                n_epochs, n_epochs_compensated)
            n_epochs = n_epochs_compensated

    # Pretrain model
    if pretrain:
        logging.info('Pre-training model %s on %s data', inference_name,
                     simulator_name)

        if checkpoint:
            inference.fit(thetas,
                          xs,
                          ys,
                          r_xz,
                          t_xz,
                          theta1=theta1,
                          z_checkpoints=z_checkpoints,
                          r_xz_checkpoints=r_xz_checkpoints,
                          t_xz_checkpoints=t_xz_checkpoints,
                          n_epochs=n_epochs,
                          batch_size=batch_size,
                          trainer=trainer,
                          initial_learning_rate=initial_lr,
                          final_learning_rate=final_lr,
                          alpha=pre_alpha,
                          beta=pre_beta,
                          gamma=pre_gamma,
                          learning_curve_folder=None,
                          learning_curve_filename=None,
                          validation_split=validation_split,
                          early_stopping=early_stopping,
                          fill_empty_bins=fill_empty_bins,
                          freeze_flow=(pretrain
                                       and sequential_checkpoint_training))

        else:
            inference.fit(thetas,
                          xs,
                          ys,
                          r_xz,
                          t_xz,
                          theta1=theta1,
                          n_epochs=n_epochs,
                          batch_size=batch_size,
                          trainer=trainer,
                          initial_learning_rate=initial_lr,
                          final_learning_rate=final_lr,
                          alpha=pre_alpha,
                          beta=pre_beta,
                          gamma=pre_gamma,
                          learning_curve_folder=None,
                          learning_curve_filename=None,
                          validation_split=validation_split,
                          early_stopping=early_stopping,
                          fill_empty_bins=fill_empty_bins)
    else:
        logging.info('No pre-training')

    # Train model
    logging.info('Training model %s on %s data', inference_name,
                 simulator_name)

    if checkpoint:
        inference.fit(thetas,
                      xs,
                      ys,
                      r_xz,
                      t_xz,
                      theta1=theta1,
                      z_checkpoints=z_checkpoints,
                      r_xz_checkpoints=r_xz_checkpoints,
                      t_xz_checkpoints=t_xz_checkpoints,
                      n_epochs=n_epochs,
                      batch_size=batch_size,
                      trainer=trainer,
                      initial_learning_rate=initial_lr,
                      final_learning_rate=final_lr,
                      alpha=alpha,
                      beta=beta,
                      gamma=gamma,
                      learning_curve_folder=result_folder,
                      learning_curve_filename=output_filename,
                      validation_split=validation_split,
                      early_stopping=early_stopping,
                      fill_empty_bins=fill_empty_bins,
                      freeze_score_model=(pretrain
                                          and sequential_checkpoint_training))

    else:
        inference.fit(thetas,
                      xs,
                      ys,
                      r_xz,
                      t_xz,
                      theta1=theta1,
                      n_epochs=n_epochs,
                      batch_size=batch_size,
                      trainer=trainer,
                      initial_learning_rate=initial_lr,
                      final_learning_rate=final_lr,
                      alpha=alpha,
                      beta=beta,
                      gamma=gamma,
                      learning_curve_folder=result_folder,
                      learning_curve_filename=output_filename,
                      validation_split=validation_split,
                      early_stopping=early_stopping,
                      fill_empty_bins=fill_empty_bins)

    # Save models
    logging.info('Saving learned model to %s',
                 model_folder + '/' + output_filename + '.*')
    inference.save(model_folder + '/' + output_filename)
Пример #6
0
def train(simulator_name,
          inference_name,
          run=0,
          n_mades=3,
          n_made_hidden_layers=2,
          n_made_units_per_layer=20,
          batch_norm=False,
          activation='relu',
          n_bins_theta='auto',
          histogram_observables='all',
          n_bins_x='auto',
          separate_1d_x_histos=False,
          fill_empty_bins=False,
          alpha=1.,
          single_theta=False,
          training_sample_size=None,
          n_epochs=20,
          compensate_sample_size=False,
          batch_size=64,
          initial_lr=0.001,
          final_lr=0.0001,
          early_stopping=True):
    """ Main training function """

    if single_theta:
        n_bins_theta = 1
    if histogram_observables is None:
        histogram_observables = 'all'
    if len(histogram_observables) == 0:
        histogram_observables = 'all'

    logging.info('Starting training')
    logging.info('  Simulator:             %s', simulator_name)
    logging.info('  Inference method:      %s', inference_name)
    logging.info('  Run number:            %s', run)
    logging.info('  MADEs:                 %s', n_mades)
    logging.info('  MADE hidden layers:    %s', n_made_hidden_layers)
    logging.info('  MADE units / layer:    %s', n_made_units_per_layer)
    logging.info('  Batch norm:            %s', batch_norm)
    logging.info('  Activation function:   %s', activation)
    logging.info('  Histogram theta bins:  %s', n_bins_theta)
    logging.info('  Histogram observables: %s', histogram_observables)
    logging.info('  Histogram x bins:      %s', separate_1d_x_histos)
    logging.info('  1d x histograms:       %s', n_bins_x)
    logging.info('  Fill empty bins:       %s', fill_empty_bins)
    logging.info('  SCANDAL alpha:         %s', alpha)
    logging.info('  Single-theta sample:   %s', single_theta)
    logging.info(
        '  Training sample size:  %s',
        'maximal' if training_sample_size is None else training_sample_size)
    if compensate_sample_size and training_sample_size is not None:
        logging.info(
            '  Epochs:                %s (plus compensation for decreased sample size)',
            n_epochs)
    else:
        logging.info('  Epochs:                %s', n_epochs)
    logging.info('  Batch size:            %s', batch_size)
    logging.info('  Learning rate:         %s initially, decaying to %s',
                 initial_lr, final_lr)
    logging.info('  Early stopping:        %s', early_stopping)

    # Check paths
    create_missing_folders(base_dir, simulator_name, inference_name)

    # Folders and filenames
    sample_folder = base_dir + '/goldmine/data/samples/' + simulator_name
    model_folder = base_dir + '/goldmine/data/models/' + simulator_name + '/' + inference_name
    result_folder = base_dir + '/goldmine/data/results/' + simulator_name + '/' + inference_name

    sample_filename = 'train'
    output_filename = ''
    if single_theta:
        output_filename += '_singletheta'
        sample_filename += '_singletheta'
    if training_sample_size is not None:
        output_filename += '_trainingsamplesize_' + str(training_sample_size)

    if run is None:
        run_appendix = ''
    elif int(run) == 0:
        run_appendix = ''
    else:
        run_appendix = '_run' + str(int(run))

    output_filename += run_appendix

    # Load training data and creating model
    logging.info('Loading %s training data from %s', simulator_name,
                 sample_folder + '/*_' + sample_filename + '.npy')
    thetas = load_and_check(sample_folder + '/theta0_' + sample_filename +
                            '.npy')
    xs = load_and_check(sample_folder + '/x_' + sample_filename + '.npy')

    n_samples = thetas.shape[0]
    n_parameters = thetas.shape[1]
    n_observables = xs.shape[1]

    logging.info('Found %s samples with %s parameters and %s observables',
                 n_samples, n_parameters, n_observables)

    inference = create_inference(inference_name,
                                 n_mades=n_mades,
                                 n_made_hidden_layers=n_made_hidden_layers,
                                 n_made_units_per_layer=n_made_units_per_layer,
                                 batch_norm=batch_norm,
                                 activation=activation,
                                 n_parameters=n_parameters,
                                 n_observables=n_observables,
                                 n_bins_theta=n_bins_theta,
                                 n_bins_x=n_bins_x,
                                 separate_1d_x_histos=separate_1d_x_histos,
                                 observables=histogram_observables)

    if inference.requires_class_label():
        ys = load_and_check(sample_folder + '/y_train.npy')
    else:
        ys = None

    if inference.requires_joint_ratio():
        r_xz = load_and_check(sample_folder + '/r_xz_train.npy')
    else:
        r_xz = None

    if inference.requires_joint_score():
        t_xz = load_and_check(sample_folder + '/t_xz_train.npy')
    else:
        t_xz = None

    # Restricted training sample size
    if training_sample_size is not None and training_sample_size < n_samples:
        thetas, xs, ys, r_xz, t_xz = shuffle(thetas, xs, ys, r_xz, t_xz)

        thetas = thetas[:training_sample_size]
        xs = xs[:training_sample_size]
        if ys is not None:
            ys = ys[:training_sample_size]
        if r_xz is not None:
            r_xz = r_xz[:training_sample_size]
        if t_xz is not None:
            t_xz = t_xz[:training_sample_size]

        logging.info('Only using %s of %s training samples',
                     training_sample_size, n_samples)

        if compensate_sample_size and training_sample_size < n_samples:
            n_epochs_compensated = int(
                round(n_epochs * n_samples / training_sample_size, 0))
            logging.info(
                'Compensating by increasing number of epochs from %s to %s',
                n_epochs, n_epochs_compensated)
            n_epochs = n_epochs_compensated

    # Train model
    logging.info('Training model %s on %s data', inference_name,
                 simulator_name)
    inference.fit(thetas,
                  xs,
                  ys,
                  r_xz,
                  t_xz,
                  n_epochs=n_epochs,
                  batch_size=batch_size,
                  initial_learning_rate=initial_lr,
                  final_learning_rate=final_lr,
                  alpha=alpha,
                  learning_curve_folder=result_folder,
                  learning_curve_filename=output_filename,
                  early_stopping=early_stopping,
                  fill_empty_bins=fill_empty_bins)

    # Save models
    logging.info('Saving learned model to %s',
                 model_folder + '/model' + output_filename + '.*')
    inference.save(model_folder + '/model' + output_filename)