def run_distributions(): num_features = 50 distributions = [[1 - math.exp(8*(x - 1)) for x in np.linspace(0, 1, num_features)], [.6] * 10 + [.585] * 10 + [0] * (num_features - 20), [.1 * (1 - x) for x in np.linspace(0, 1, num_features)]] plt.ylabel('mutual information to target in bits') plt.xlabel('feature') for distribution in distributions: plt.plot(distribution) plt.legend(labels=distribution_names) plt.savefig(str(timestamp_directory / ".." / "distribution_definitions.pdf")) results = [] strategies = [gaussian_strategy(), exploitation_strategy(0)] for distribution in distributions: data_for_runs = [generate_data(relevance_distribution=distribution) for _ in range(20)] results.append(run_batch(data_for_runs, num_features_to_select=10, iterations=20 + 2 * num_features, true_relevances=distribution, strategies=strategies)) to_plot = dict([(strategy, np.swapaxes([result[strategy] for result in results], 0, 1)) for strategy in strategies]) plot_summary(to_plot, xlabel='feature count', name=f'feature_counts', print_stats=True)
def run_dimensionality(): all_num_features = [20, 50, 100, 150, 200] results = [] strategies = [gaussian_strategy(), exploitation_strategy(0)] for num_features in all_num_features: distribution = [.6] * 10 + [.585] * 10 + [0] * (num_features - 20) data_for_runs = [generate_data(relevance_distribution=distribution) for _ in range(20)] results.append(run_batch(data_for_runs, num_features_to_select=10, iterations=200 + 2 * num_features, true_relevances=distribution, strategies=strategies)) to_plot = dict([(strategy, np.swapaxes([result[strategy] for result in results], 0, 1)) for strategy in strategies]) plot_summary(to_plot, xlabel='feature count', name=f'feature_counts', xvalues=all_num_features)
choices=['Syn1', 'Syn2', 'Syn3', 'Syn4', 'Syn5', 'Syn6'], default='Syn1') parser.add_argument("--epochs", help="sets number of epochs to train for", type=int, default=DEFAULT_EPOCHS) args = parser.parse_args() if args.coeff is None: if args.method == 'INVASE': args.coeff = DEFAULT_INVASE_COEFF else: args.coeff = DEFAULT_NON_INVASE_COEFF # Generate train/test data x_train, y_train, sel_truth_train = generate_data(args.data, N_TRAIN) x_test, y_test, sel_truth_test = generate_data(args.data, N_TEST) # Plot test data # plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test[:, 0]) # plt.show() # Create train and test datasets train_ds = tf.data.Dataset.from_tensor_slices( (x_train, y_train)).shuffle(10000).batch(32) test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32) # Predictor (& baseline) model class MyModel(Model): def __init__(self):
if load_data == False: if load_target == True: with open(target_file_name, 'rb') as f: target = pickle.load(f) else: target = synthetic_data.generate_random_LinRNN( target_num_states, target_input_dim, target_output_dim, alpha_variance=0.2, A_variance=0.2, Omega_variance=0.2) with open(target_file_name, 'wb') as f: pickle.dump(target, f) data_function = lambda l, n: synthetic_data.generate_data( target, N_samples=n, seq_length=l, noise_variance=noise_level) Xtest, ytest = data_function(n=1000, l=test_length) with open(xp_path + 'all_data.pickle', 'wb') as f: pickle.dump([Xtest, ytest], f) else: with open( './Data/RandomRNN/noise_' + str(noise_level) + '_units_' + str(target_num_states) + '/Test.pickle', 'rb') as f: [Xtest, ytest] = pickle.load(f) with open(target_file_name, 'rb') as f: target = pickle.load(f) data_function = lambda l, n: synthetic_data.generate_data( target, n, l, noise_variance=noise_level) elif exp == 'Wind': data, train_test_split = synthetic_data.generate_wind_speed( './Data/Wind_Speed/train.csv', './Data/Wind_Speed/test.csv')
if load_data == False: if load_target == True: with open(target_file_name, 'rb') as f: target = pickle.load(f) else: target = synthetic_data.generate_random_LinRNN(target_num_states, target_input_dim, target_output_dim, alpha_variance=0.2, A_variance=0.2, Omega_variance=0.2) with open(target_file_name, 'wb') as f: pickle.dump(target, f) data_function = lambda l: synthetic_data.generate_data( target, 1000, l, noise_variance=noise_level) Xtest, ytest = data_function(test_length) with open(xp_path + 'all_data.pickle', 'wb') as f: pickle.dump([Xtest, ytest], f) elif load_data == True: with open( './Data/RandomRNN/noise_' + str(noise_level) + '_units_' + str(target_num_states) + '/Test.pickle', 'rb') as f: [Xtest, ytest] = pickle.load(f) with open(target_file_name, 'rb') as f: target = pickle.load(f) for run in range(N_runs): print("test MSE of zero function", np.mean(ytest**2))
def synthetic(): distribution = [.6] * 5 + [.585] * 10 + [0] * 20 data_for_runs = [generate_data(relevance_distribution=distribution) for _ in range(10)] run_batch(data_for_runs, num_features_to_select=5, iterations=1000, true_relevances=distribution, strategies=[exploitation_strategy()])