def nonbatch(task, method, N, M):
    simulation_object = create_env(task)
    d = simulation_object.num_of_features
    lower_input_bound = [x[0] for x in simulation_object.feed_bounds]
    upper_input_bound = [x[1] for x in simulation_object.feed_bounds]

    w_sampler = Sampler(d)
    psi_set = []
    s_set = []
    input_A = np.random.uniform(low=2 * lower_input_bound,
                                high=2 * upper_input_bound,
                                size=(2 * simulation_object.feed_size))
    input_B = np.random.uniform(low=2 * lower_input_bound,
                                high=2 * upper_input_bound,
                                size=(2 * simulation_object.feed_size))
    psi, s = get_feedback(simulation_object, input_A, input_B)
    psi_set.append(psi)
    s_set.append(s)
    for i in range(1, N):
        w_sampler.A = psi_set
        w_sampler.y = np.array(s_set).reshape(-1, 1)
        w_samples = w_sampler.sample(M)
        mean_w_samples = np.mean(w_samples, axis=0)
        print('w-estimate = {}'.format(mean_w_samples /
                                       np.linalg.norm(mean_w_samples)))
        input_A, input_B = run_algo(method, simulation_object, w_samples)
        psi, s = get_feedback(simulation_object, input_A, input_B)
        psi_set.append(psi)
        s_set.append(s)
    w_sampler.A = psi_set
    w_sampler.y = np.array(s_set).reshape(-1, 1)
    w_samples = w_sampler.sample(M)
    print('w-estimate = {}'.format(mean_w_samples /
                                   np.linalg.norm(mean_w_samples)))
Exemplo n.º 2
0
def nonbatch(task, method, N, M):
    simulation_object = create_env(task)
    d = simulation_object.num_of_features
	
    w_true = 2*np.random.rand(d)-1
    w_true = w_true / np.linalg.norm(w_true)
    print('If in automated mode: true w = {}'.format(w_true/np.linalg.norm(w_true)))
	
    lower_input_bound = [x[0] for x in simulation_object.feed_bounds]
    upper_input_bound = [x[1] for x in simulation_object.feed_bounds]

    w_sampler = Sampler(d)
    psi_set = []
    s_set = []
    for i in range(N):
        w_sampler.A = psi_set
        w_sampler.y = np.array(s_set).reshape(-1,1)
        w_samples = w_sampler.sample(M)
        mean_w_samples = np.mean(w_samples,axis=0)
        print('Samples so far: ' + str(i))
        print('w estimate = {}'.format(mean_w_samples/np.linalg.norm(mean_w_samples)))
        print('Alignment = {}'.format(mean_w_samples.dot(w_true)/np.linalg.norm(mean_w_samples)))
        input_A, input_B = run_algo(method, simulation_object, w_samples)
        psi, s = get_feedback(simulation_object, input_A, input_B, w_true)
        psi_set.append(psi)
        s_set.append(s)
    w_sampler.A = psi_set
    w_sampler.y = np.array(s_set).reshape(-1,1)
    w_samples = w_sampler.sample(M)
    print('Samples so far: ' + str(N))
    print('w estimate = {}'.format(mean_w_samples/np.linalg.norm(mean_w_samples)))
    print('Alignment = {}'.format(mean_w_samples.dot(w_true)/np.linalg.norm(mean_w_samples)))
Exemplo n.º 3
0
def nonbatch(task, criterion, query_type, epsilon, M):
	simulation_object = create_env(task)		
	d = simulation_object.num_of_features

	true_delta = 1 # make this None if you will also learn delta, and change the samplers below from sample_given_delta to sample (and of course remove the true_delta argument)

	lower_input_bound = [x[0] for x in simulation_object.feed_bounds]
	upper_input_bound = [x[1] for x in simulation_object.feed_bounds]

	w_sampler = Sampler(d)
	i = 0
	score = np.inf
	while score >= epsilon:
		w_samples, delta_samples = w_sampler.sample_given_delta(M, query_type, true_delta)
		mean_w_samples = np.mean(w_samples,axis=0)
		print('w-estimate = {}'.format(mean_w_samples/np.linalg.norm(mean_w_samples)))
		input_A, input_B, score = run_algo(criterion, simulation_object, w_samples, delta_samples)
		if criterion == 'information':
			print('Expected info gain = {}'.format(score))
		elif criterion == 'volume':
			print('Expected volume removal (meaningless scale) = {}'.format(score/M))
		if score > epsilon:
			phi_A, phi_B, s = get_feedback(simulation_object, input_A, input_B, query_type)
			w_sampler.feed(phi_A, phi_B, [s])
			i += 1
	w_samples, delta_samples = w_sampler.sample_given_delta(M, query_type, true_delta)
	mean_w_samples = np.mean(w_samples,axis=0)
	print('w-estimate = {}'.format(mean_w_samples/np.linalg.norm(mean_w_samples)))
    def add_traj(samplemethod, traj_set):

        sample_A, sample_B = run_algo(samplemethod, simulation_object,
                                      reward_values.reshape(1, -1))
        simulation_object.feed(sample_A)
        phi_A = simulation_object.get_features()
        # now, compute the reward for each sample
        reward_A = np.sum(reward_values * phi_A)
        traj_set.append(
            lattice.Node(sample_A, reward_value=reward_A, features=phi_A))
def batch(task, method, N, M, b):
    if N % b != 0:
        print('N must be divisible to b')
        exit(0)
    B = 20 * b

    simulation_object = create_env(task)
    d = simulation_object.num_of_features
    lower_input_bound = [x[0] for x in simulation_object.feed_bounds]
    upper_input_bound = [x[1] for x in simulation_object.feed_bounds]

    w_sampler = Sampler(d)
    psi_set = []
    s_set = []
    inputA_set = np.random.uniform(low=2 * lower_input_bound,
                                   high=2 * upper_input_bound,
                                   size=(b, 2 * simulation_object.feed_size))
    inputB_set = np.random.uniform(low=2 * lower_input_bound,
                                   high=2 * upper_input_bound,
                                   size=(b, 2 * simulation_object.feed_size))
    for j in range(b):
        input_A = inputA_set[j]
        input_B = inputB_set[j]
        psi, s = get_feedback(simulation_object, input_A, input_B)
        psi_set.append(psi)
        s_set.append(s)
    i = b
    while i < N:
        w_sampler.A = psi_set
        w_sampler.y = np.array(s_set).reshape(-1, 1)
        w_samples = w_sampler.sample(M)
        mean_w_samples = np.mean(w_samples, axis=0)
        print('w-estimate = {}'.format(mean_w_samples /
                                       np.linalg.norm(mean_w_samples)))
        print('Samples so far: ' + str(i))
        inputA_set, inputB_set = run_algo(method, simulation_object, w_samples,
                                          b, B)
        for j in range(b):
            input_A = inputA_set[j]
            input_B = inputB_set[j]
            psi, s = get_feedback(simulation_object, input_B, input_A)
            psi_set.append(psi)
            s_set.append(s)
        i += b
    w_sampler.A = psi_set
    w_sampler.y = np.array(s_set).reshape(-1, 1)
    w_samples = w_sampler.sample(M)
    mean_w_samples = np.mean(w_samples, axis=0)
    print('w-estimate = {}'.format(mean_w_samples /
                                   np.linalg.norm(mean_w_samples)))
def nonbatch(task, method, N, M, checkpoints=None):
    if checkpoints is None:
        checkpoints = []
    checkpointed_weights = []
    simulation_object = create_env(task)
    d = simulation_object.num_of_features
    lower_input_bound = [x[0] for x in simulation_object.feed_bounds]
    upper_input_bound = [x[1] for x in simulation_object.feed_bounds]

    w_sampler = Sampler(d)
    psi_set = []
    s_set = []
    input_A = np.random.uniform(low=2 * lower_input_bound,
                                high=2 * upper_input_bound,
                                size=(2 * simulation_object.feed_size))
    input_B = np.random.uniform(low=2 * lower_input_bound,
                                high=2 * upper_input_bound,
                                size=(2 * simulation_object.feed_size))
    psi, s = get_feedback_auto(
        simulation_object, input_A,
        input_B)  # psi is the difference, s is the 1 or -1 signal
    psi_set.append(psi)
    s_set.append(s)
    for i in range(1, N):
        w_sampler.A = psi_set
        w_sampler.y = np.array(s_set).reshape(-1, 1)
        w_samples = w_sampler.sample(M)
        mean_w_samples = np.mean(w_samples, axis=0)
        print('w-estimate = {}'.format(mean_w_samples /
                                       np.linalg.norm(mean_w_samples)))
        if i in checkpoints:
            checkpointed_weights.append(mean_w_samples /
                                        np.linalg.norm(mean_w_samples))
            print("Weights saved at iteration {}".format(i))
        input_A, input_B = run_algo(method, simulation_object, w_samples)
        psi, s = get_feedback_auto(simulation_object, input_A, input_B)
        psi_set.append(psi)
        s_set.append(s)
    w_sampler.A = psi_set
    w_sampler.y = np.array(s_set).reshape(-1, 1)
    w_samples = w_sampler.sample(M)
    checkpointed_weights.append(mean_w_samples /
                                np.linalg.norm(mean_w_samples))
    print('w-estimate = {}'.format(mean_w_samples /
                                   np.linalg.norm(mean_w_samples)))
    return checkpointed_weights
Exemplo n.º 7
0
def batch(task, method, N, M, b):
    if N % b != 0:
        print('N must be divisible to b')
        exit(0)
    B = 20*b

    simulation_object = create_env(task)
    d = simulation_object.num_of_features
	
    w_true = 2*np.random.rand(d)-1
    w_true = w_true / np.linalg.norm(w_true)
    print('If in automated mode: true w = {}'.format(w_true/np.linalg.norm(w_true)))
	
    lower_input_bound = [x[0] for x in simulation_object.feed_bounds]
    upper_input_bound = [x[1] for x in simulation_object.feed_bounds]

    w_sampler = Sampler(d)
    psi_set = []
    s_set = []
    i = 0
    while i < N:
        w_sampler.A = psi_set
        w_sampler.y = np.array(s_set).reshape(-1,1)
        w_samples = w_sampler.sample(M)
        mean_w_samples = np.mean(w_samples,axis=0)
        print('Samples so far: ' + str(i))
        print('w estimate = {}'.format(mean_w_samples/np.linalg.norm(mean_w_samples)))
        print('Alignment = {}'.format(mean_w_samples.dot(w_true)/np.linalg.norm(mean_w_samples)))
        inputA_set, inputB_set = run_algo(method, simulation_object, w_samples, b, B)
        for j in range(b):
            input_A = inputA_set[j]
            input_B = inputB_set[j]
            psi, s = get_feedback(simulation_object, input_B, input_A, w_true)
            psi_set.append(psi)
            s_set.append(s)
        i += b
    w_sampler.A = psi_set
    w_sampler.y = np.array(s_set).reshape(-1,1)
    w_samples = w_sampler.sample(M)
    mean_w_samples = np.mean(w_samples, axis=0)
    print('Samples so far: ' + str(N))
    print('w estimate = {}'.format(mean_w_samples/np.linalg.norm(mean_w_samples)))
    print('Alignment = {}'.format(mean_w_samples.dot(w_true)/np.linalg.norm(mean_w_samples)))