示例#1
0
def run_distributions():
    num_features = 50

    distributions = [[1 - math.exp(8*(x - 1)) for x in np.linspace(0, 1, num_features)],
                     [.6] * 10 + [.585] * 10 + [0] * (num_features - 20),
                     [.1 * (1 - x) for x in np.linspace(0, 1, num_features)]]

    plt.ylabel('mutual information to target in bits')
    plt.xlabel('feature')

    for distribution in distributions:
        plt.plot(distribution)

    plt.legend(labels=distribution_names)

    plt.savefig(str(timestamp_directory / ".." / "distribution_definitions.pdf"))

    results = []

    strategies = [gaussian_strategy(), exploitation_strategy(0)]

    for distribution in distributions:
        data_for_runs = [generate_data(relevance_distribution=distribution) for _ in range(20)]

        results.append(run_batch(data_for_runs, num_features_to_select=10, iterations=20 + 2 * num_features,
                                 true_relevances=distribution,
                                 strategies=strategies))

    to_plot = dict([(strategy, np.swapaxes([result[strategy] for result in results], 0, 1))
                    for strategy in strategies])

    plot_summary(to_plot, xlabel='feature count', name=f'feature_counts', print_stats=True)
示例#2
0
def run_dimensionality():
    all_num_features = [20, 50, 100, 150, 200]

    results = []

    strategies = [gaussian_strategy(), exploitation_strategy(0)]

    for num_features in all_num_features:
        distribution = [.6] * 10 + [.585] * 10 + [0] * (num_features - 20)

        data_for_runs = [generate_data(relevance_distribution=distribution) for _ in range(20)]

        results.append(run_batch(data_for_runs, num_features_to_select=10, iterations=200 + 2 * num_features,
                                 true_relevances=distribution,
                                 strategies=strategies))

    to_plot = dict([(strategy, np.swapaxes([result[strategy] for result in results], 0, 1))
                    for strategy in strategies])

    plot_summary(to_plot, xlabel='feature count', name=f'feature_counts', xvalues=all_num_features)
示例#3
0
                    choices=['Syn1', 'Syn2', 'Syn3', 'Syn4', 'Syn5', 'Syn6'],
                    default='Syn1')
parser.add_argument("--epochs",
                    help="sets number of epochs to train for",
                    type=int,
                    default=DEFAULT_EPOCHS)
args = parser.parse_args()

if args.coeff is None:
    if args.method == 'INVASE':
        args.coeff = DEFAULT_INVASE_COEFF
    else:
        args.coeff = DEFAULT_NON_INVASE_COEFF

# Generate train/test data
x_train, y_train, sel_truth_train = generate_data(args.data, N_TRAIN)
x_test, y_test, sel_truth_test = generate_data(args.data, N_TEST)

# Plot test data
# plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test[:, 0])
# plt.show()

# Create train and test datasets
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)


# Predictor (& baseline) model
class MyModel(Model):
    def __init__(self):
        if load_data == False:
            if load_target == True:
                with open(target_file_name, 'rb') as f:
                    target = pickle.load(f)
            else:
                target = synthetic_data.generate_random_LinRNN(
                    target_num_states,
                    target_input_dim,
                    target_output_dim,
                    alpha_variance=0.2,
                    A_variance=0.2,
                    Omega_variance=0.2)
                with open(target_file_name, 'wb') as f:
                    pickle.dump(target, f)

            data_function = lambda l, n: synthetic_data.generate_data(
                target, N_samples=n, seq_length=l, noise_variance=noise_level)
            Xtest, ytest = data_function(n=1000, l=test_length)
            with open(xp_path + 'all_data.pickle', 'wb') as f:
                pickle.dump([Xtest, ytest], f)
        else:
            with open(
                    './Data/RandomRNN/noise_' + str(noise_level) + '_units_' +
                    str(target_num_states) + '/Test.pickle', 'rb') as f:
                [Xtest, ytest] = pickle.load(f)
            with open(target_file_name, 'rb') as f:
                target = pickle.load(f)
            data_function = lambda l, n: synthetic_data.generate_data(
                target, n, l, noise_variance=noise_level)
    elif exp == 'Wind':
        data, train_test_split = synthetic_data.generate_wind_speed(
            './Data/Wind_Speed/train.csv', './Data/Wind_Speed/test.csv')
示例#5
0
    if load_data == False:
        if load_target == True:
            with open(target_file_name, 'rb') as f:
                target = pickle.load(f)
        else:
            target = synthetic_data.generate_random_LinRNN(target_num_states,
                                                           target_input_dim,
                                                           target_output_dim,
                                                           alpha_variance=0.2,
                                                           A_variance=0.2,
                                                           Omega_variance=0.2)
            with open(target_file_name, 'wb') as f:
                pickle.dump(target, f)

        data_function = lambda l: synthetic_data.generate_data(
            target, 1000, l, noise_variance=noise_level)
        Xtest, ytest = data_function(test_length)
        with open(xp_path + 'all_data.pickle', 'wb') as f:
            pickle.dump([Xtest, ytest], f)

    elif load_data == True:
        with open(
                './Data/RandomRNN/noise_' + str(noise_level) + '_units_' +
                str(target_num_states) + '/Test.pickle', 'rb') as f:
            [Xtest, ytest] = pickle.load(f)
        with open(target_file_name, 'rb') as f:
            target = pickle.load(f)

    for run in range(N_runs):

        print("test MSE of zero function", np.mean(ytest**2))
示例#6
0
def synthetic():
    distribution = [.6] * 5 + [.585] * 10 + [0] * 20
    data_for_runs = [generate_data(relevance_distribution=distribution) for _ in range(10)]
    run_batch(data_for_runs, num_features_to_select=5, iterations=1000, true_relevances=distribution,
              strategies=[exploitation_strategy()])