def nonbatch(task, method, N, M): simulation_object = create_env(task) d = simulation_object.num_of_features lower_input_bound = [x[0] for x in simulation_object.feed_bounds] upper_input_bound = [x[1] for x in simulation_object.feed_bounds] w_sampler = Sampler(d) psi_set = [] s_set = [] input_A = np.random.uniform(low=2 * lower_input_bound, high=2 * upper_input_bound, size=(2 * simulation_object.feed_size)) input_B = np.random.uniform(low=2 * lower_input_bound, high=2 * upper_input_bound, size=(2 * simulation_object.feed_size)) psi, s = get_feedback(simulation_object, input_A, input_B) psi_set.append(psi) s_set.append(s) for i in range(1, N): w_sampler.A = psi_set w_sampler.y = np.array(s_set).reshape(-1, 1) w_samples = w_sampler.sample(M) mean_w_samples = np.mean(w_samples, axis=0) print('w-estimate = {}'.format(mean_w_samples / np.linalg.norm(mean_w_samples))) input_A, input_B = run_algo(method, simulation_object, w_samples) psi, s = get_feedback(simulation_object, input_A, input_B) psi_set.append(psi) s_set.append(s) w_sampler.A = psi_set w_sampler.y = np.array(s_set).reshape(-1, 1) w_samples = w_sampler.sample(M) print('w-estimate = {}'.format(mean_w_samples / np.linalg.norm(mean_w_samples)))
def nonbatch(task, method, N, M): simulation_object = create_env(task) d = simulation_object.num_of_features w_true = 2*np.random.rand(d)-1 w_true = w_true / np.linalg.norm(w_true) print('If in automated mode: true w = {}'.format(w_true/np.linalg.norm(w_true))) lower_input_bound = [x[0] for x in simulation_object.feed_bounds] upper_input_bound = [x[1] for x in simulation_object.feed_bounds] w_sampler = Sampler(d) psi_set = [] s_set = [] for i in range(N): w_sampler.A = psi_set w_sampler.y = np.array(s_set).reshape(-1,1) w_samples = w_sampler.sample(M) mean_w_samples = np.mean(w_samples,axis=0) print('Samples so far: ' + str(i)) print('w estimate = {}'.format(mean_w_samples/np.linalg.norm(mean_w_samples))) print('Alignment = {}'.format(mean_w_samples.dot(w_true)/np.linalg.norm(mean_w_samples))) input_A, input_B = run_algo(method, simulation_object, w_samples) psi, s = get_feedback(simulation_object, input_A, input_B, w_true) psi_set.append(psi) s_set.append(s) w_sampler.A = psi_set w_sampler.y = np.array(s_set).reshape(-1,1) w_samples = w_sampler.sample(M) print('Samples so far: ' + str(N)) print('w estimate = {}'.format(mean_w_samples/np.linalg.norm(mean_w_samples))) print('Alignment = {}'.format(mean_w_samples.dot(w_true)/np.linalg.norm(mean_w_samples)))
def nonbatch(task, criterion, query_type, epsilon, M): simulation_object = create_env(task) d = simulation_object.num_of_features true_delta = 1 # make this None if you will also learn delta, and change the samplers below from sample_given_delta to sample (and of course remove the true_delta argument) lower_input_bound = [x[0] for x in simulation_object.feed_bounds] upper_input_bound = [x[1] for x in simulation_object.feed_bounds] w_sampler = Sampler(d) i = 0 score = np.inf while score >= epsilon: w_samples, delta_samples = w_sampler.sample_given_delta(M, query_type, true_delta) mean_w_samples = np.mean(w_samples,axis=0) print('w-estimate = {}'.format(mean_w_samples/np.linalg.norm(mean_w_samples))) input_A, input_B, score = run_algo(criterion, simulation_object, w_samples, delta_samples) if criterion == 'information': print('Expected info gain = {}'.format(score)) elif criterion == 'volume': print('Expected volume removal (meaningless scale) = {}'.format(score/M)) if score > epsilon: phi_A, phi_B, s = get_feedback(simulation_object, input_A, input_B, query_type) w_sampler.feed(phi_A, phi_B, [s]) i += 1 w_samples, delta_samples = w_sampler.sample_given_delta(M, query_type, true_delta) mean_w_samples = np.mean(w_samples,axis=0) print('w-estimate = {}'.format(mean_w_samples/np.linalg.norm(mean_w_samples)))
def add_traj(samplemethod, traj_set): sample_A, sample_B = run_algo(samplemethod, simulation_object, reward_values.reshape(1, -1)) simulation_object.feed(sample_A) phi_A = simulation_object.get_features() # now, compute the reward for each sample reward_A = np.sum(reward_values * phi_A) traj_set.append( lattice.Node(sample_A, reward_value=reward_A, features=phi_A))
def batch(task, method, N, M, b): if N % b != 0: print('N must be divisible to b') exit(0) B = 20 * b simulation_object = create_env(task) d = simulation_object.num_of_features lower_input_bound = [x[0] for x in simulation_object.feed_bounds] upper_input_bound = [x[1] for x in simulation_object.feed_bounds] w_sampler = Sampler(d) psi_set = [] s_set = [] inputA_set = np.random.uniform(low=2 * lower_input_bound, high=2 * upper_input_bound, size=(b, 2 * simulation_object.feed_size)) inputB_set = np.random.uniform(low=2 * lower_input_bound, high=2 * upper_input_bound, size=(b, 2 * simulation_object.feed_size)) for j in range(b): input_A = inputA_set[j] input_B = inputB_set[j] psi, s = get_feedback(simulation_object, input_A, input_B) psi_set.append(psi) s_set.append(s) i = b while i < N: w_sampler.A = psi_set w_sampler.y = np.array(s_set).reshape(-1, 1) w_samples = w_sampler.sample(M) mean_w_samples = np.mean(w_samples, axis=0) print('w-estimate = {}'.format(mean_w_samples / np.linalg.norm(mean_w_samples))) print('Samples so far: ' + str(i)) inputA_set, inputB_set = run_algo(method, simulation_object, w_samples, b, B) for j in range(b): input_A = inputA_set[j] input_B = inputB_set[j] psi, s = get_feedback(simulation_object, input_B, input_A) psi_set.append(psi) s_set.append(s) i += b w_sampler.A = psi_set w_sampler.y = np.array(s_set).reshape(-1, 1) w_samples = w_sampler.sample(M) mean_w_samples = np.mean(w_samples, axis=0) print('w-estimate = {}'.format(mean_w_samples / np.linalg.norm(mean_w_samples)))
def nonbatch(task, method, N, M, checkpoints=None): if checkpoints is None: checkpoints = [] checkpointed_weights = [] simulation_object = create_env(task) d = simulation_object.num_of_features lower_input_bound = [x[0] for x in simulation_object.feed_bounds] upper_input_bound = [x[1] for x in simulation_object.feed_bounds] w_sampler = Sampler(d) psi_set = [] s_set = [] input_A = np.random.uniform(low=2 * lower_input_bound, high=2 * upper_input_bound, size=(2 * simulation_object.feed_size)) input_B = np.random.uniform(low=2 * lower_input_bound, high=2 * upper_input_bound, size=(2 * simulation_object.feed_size)) psi, s = get_feedback_auto( simulation_object, input_A, input_B) # psi is the difference, s is the 1 or -1 signal psi_set.append(psi) s_set.append(s) for i in range(1, N): w_sampler.A = psi_set w_sampler.y = np.array(s_set).reshape(-1, 1) w_samples = w_sampler.sample(M) mean_w_samples = np.mean(w_samples, axis=0) print('w-estimate = {}'.format(mean_w_samples / np.linalg.norm(mean_w_samples))) if i in checkpoints: checkpointed_weights.append(mean_w_samples / np.linalg.norm(mean_w_samples)) print("Weights saved at iteration {}".format(i)) input_A, input_B = run_algo(method, simulation_object, w_samples) psi, s = get_feedback_auto(simulation_object, input_A, input_B) psi_set.append(psi) s_set.append(s) w_sampler.A = psi_set w_sampler.y = np.array(s_set).reshape(-1, 1) w_samples = w_sampler.sample(M) checkpointed_weights.append(mean_w_samples / np.linalg.norm(mean_w_samples)) print('w-estimate = {}'.format(mean_w_samples / np.linalg.norm(mean_w_samples))) return checkpointed_weights
def batch(task, method, N, M, b): if N % b != 0: print('N must be divisible to b') exit(0) B = 20*b simulation_object = create_env(task) d = simulation_object.num_of_features w_true = 2*np.random.rand(d)-1 w_true = w_true / np.linalg.norm(w_true) print('If in automated mode: true w = {}'.format(w_true/np.linalg.norm(w_true))) lower_input_bound = [x[0] for x in simulation_object.feed_bounds] upper_input_bound = [x[1] for x in simulation_object.feed_bounds] w_sampler = Sampler(d) psi_set = [] s_set = [] i = 0 while i < N: w_sampler.A = psi_set w_sampler.y = np.array(s_set).reshape(-1,1) w_samples = w_sampler.sample(M) mean_w_samples = np.mean(w_samples,axis=0) print('Samples so far: ' + str(i)) print('w estimate = {}'.format(mean_w_samples/np.linalg.norm(mean_w_samples))) print('Alignment = {}'.format(mean_w_samples.dot(w_true)/np.linalg.norm(mean_w_samples))) inputA_set, inputB_set = run_algo(method, simulation_object, w_samples, b, B) for j in range(b): input_A = inputA_set[j] input_B = inputB_set[j] psi, s = get_feedback(simulation_object, input_B, input_A, w_true) psi_set.append(psi) s_set.append(s) i += b w_sampler.A = psi_set w_sampler.y = np.array(s_set).reshape(-1,1) w_samples = w_sampler.sample(M) mean_w_samples = np.mean(w_samples, axis=0) print('Samples so far: ' + str(N)) print('w estimate = {}'.format(mean_w_samples/np.linalg.norm(mean_w_samples))) print('Alignment = {}'.format(mean_w_samples.dot(w_true)/np.linalg.norm(mean_w_samples)))