示例#1
0
 def __init__(self, params):
     # private variables with read access
     self.__agent = gym.make(
         "racer-v0",
         render_mode=params["render_mode"],
         sensor_array_type=params["sensor_type"],
         sensor_array_params=params["sensor_array_params"])
     self.__obs = self.__agent.reset()
     self.__done = False
     self.__reward = None
     self.__network = network.neural_network(
         self.agent.observation_space.shape[0], 8)
     self.__offline_network = network.neural_network(
         self.agent.observation_space.shape[0], 8, requires_grad=False)
     self.__memory = None
     # public variables
     self.render_mode = params["render_mode"]
     return
示例#2
0
plt.figure(2)
plt.scatter(dataset[:, 0].reshape((dataset.shape[0], 1)),
            dataset[:, 3].reshape((dataset.shape[0], 1)))
plt.savefig('./Plots/g.png')

plt.figure(5)
plt.scatter(dataset[:, 0].reshape((dataset.shape[0], 1)),
            dataset[:, 4].reshape((dataset.shape[0], 1)))
plt.savefig('./Plots/d.png')

print(dataset.shape)

g = tf.Graph()

with g.as_default():
    model_G = neural_network(1, 1, [10], name='Model_G_')
    init_g = tf.initialize_all_variables()
    x_g = tf.placeholder(tf.float64, [None, 1])
    G = model_G.value(x_g)
    d1g = model_G.dx(x_g)
    d2g = model_G.d2x(x_g)

d = tf.Graph()

with d.as_default():
    model_D = neural_network(1, 1, [10, 10], name='Model_D_')
    init_d = tf.initialize_all_variables()
    x_d = tf.placeholder(tf.float64, [None, 1])
    D = model_D.value(x_d)
    d1d = model_D.dx(x_d)
    d2d = model_D.d2x(x_d)
示例#3
0
        e_greedy = 0.4
    elif 750 <= episode < 800:
        e_greedy = 0.3
    elif 800 <= episode < 850:
        e_greedy = 0.2
    else:
        e_greedy = 0.1

    # sample net structure, train and get accuracy
    S, U = table.sample_new_network(epsilon=e_greedy)
    while S in Memory_S:
        S, U = table.sample_new_network(epsilon=e_greedy)
    
    net_structure = deepcopy(U)
    net_structure.append(('T', 4))
    nn = neural_network(net_structure=net_structure, input_shape=input_shape)
    nn.compile_model()
    nn.fit_model(X_train, y_train, batch_size=128, nb_epoch=5, val_X=X_test, val_y=y_test, verbose=1)
    accuracy = nn.evaluate_model()
    print('score :', accuracy)

    # store to replay memory
    Memory_S.append(S)
    Memory_U.append(U)
    Memory_accuracy.append(accuracy)

    # update q-table for k times
    for memory in range(k_replay_update):
        S_sample, U_sample, accuracy_sample = uniform(Memory_S, Memory_U, Memory_accuracy)
        table.update_q_values(S_sample, U_sample, accuracy_sample)
    
示例#4
0
lr = 0.01
N = 3

data_sampler = sample_dataset(dataset, batch_size, N)
n_batches = int(len(dataset)/batch_size)


plt.figure(1); plt.scatter(dataset[:,0].reshape((dataset.shape[0],1)), dataset[:,N].reshape((dataset.shape[0],1)))
plt.savefig('./Plots/1.png')

#placeholders for training data

x = tf.placeholder(tf.float64, [None, 1])
y = tf.placeholder(tf.float64, [None, 1])

model = neural_network(1,1,[10], name='Model_G_')

network_out = model.value(x)

loss = tf.reduce_mean(tf.nn.l2_loss(network_out-y))

optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)

init = tf.initialize_all_variables()

saver = tf.train.Saver(save_relative_paths=True)

with tf.Session() as sess:
    # create initialized variables
    best_loss = sys.maxsize
    sess.run(init)