display = ShowTraining(epochs_num=epochs) trainer = Trainer(show_training=True, show_function=display.show) t = time.time() J_train_list, dJdy_list, J_test_list = trainer.learn( model=model, train=train, test=test, loss=NegativeLogLikelihoodLoss(), # loss = CrossEntropyLoss(), # loss = SquaredLoss(), # optimizer = GradientDescent(learning_rate = 0.15/110), optimizer=GradientDescentMomentum(learning_rate=0.005, momentum=0.8), batch_size=100, epochs=epochs) elapsed = time.time() - t print 'Training time: ' + str(elapsed) y1 = [] for i, (x, target) in enumerate(train): y1.append(model.forward(x)) # y2 = [] for i, (x, target) in enumerate(test): y2.append(model.forward(x)) #
np.array([-1.0, -1.0]), np.array([1.0, 1.0])) n = Sequential( norm, # LinearLayer(2,5,weights='gaussian'), # TanhLayer, #AddGaussian(1), LinearLayer(2, 4, weights='gaussian'), RandomGaussianLayer(1), SoftMaxLayer) agent = GenericAgent(n, 4, 40, 5.0) agent.set_training_options( Trainer(), NegativeLogLikelihoodLoss(), GradientDescentMomentum( learning_rate=0.1, momentum=0.7) #GradientDescent(learning_rate=0.2) ) start = np.array([3.5, 3.5]) obstacles = [ # np.array([2.5,2.5,1.0]) ] win = np.array([0.5, 0.5, 0.5]) def data_gen(t=0): ball = Ball(np.random.rand(1, 2)[0] * 5) time_start_game = 0 for ind, time in enumerate(np.linspace(0, time_end, time_end / time_step)): state = np.array([ball.p[0], ball.p[1]])
# W3 = utils.SharedWeights(np.array([[10.0,-10.0,0.0],[-10.0,10.0,0.0]]),2+1,2) #W2 = utils.SharedWeights('gaussian',2+1,2) Q_hat = Sequential( norm, LinearLayer(2, 2, weights=W3), ReluLayer, LinearLayer(2, 3, weights=W4), # TanhLayer ) #Q, Q_hat, replay_memory_size, minibatch_size = 100, learning_rate = 0.1, gamma = 0.95, policy = 'esp-greedy', epsilon = 0.3 agent = DeepAgent(Q, Q_hat, 1000, minibatch_size=100, policy='eps-greedy') agent.set_training_options( Trainer(show_training=True), SquaredLoss(), # GradientDescent(learning_rate=0.001) GradientDescentMomentum(learning_rate=0.1, momentum=0.2, clip=1) # ) J_train_list = [] dJdy_list = [] def data_gen(t=0): cart = Cart() ball = Ball() catches = 0 for ind, time in enumerate(np.linspace(0, time_end, time_end / time_step)): # print time state = np.array([ball.p[0], cart.p[0]]) #,ball.p[1],ball.v[0]]) ind_command = agent.forward(state)
printer = ShowTraining(epochs_num=epochs, weights_list={ 'a': a.net.W, 'b': b.net.W, 'c': c.net.W }) t = Trainer(show_training=True, show_function=printer.show) # J_list, dJdy_list, J_test_list = t.learn( model=n, train=train, test=test, loss=SquaredLoss(), optimizer=GradientDescentMomentum(learning_rate=0.9, momentum=0.5), # batch_size = len(train), # optimizer = AdaGrad(learning_rate=0.6), epochs=epochs) # plt.figure(4) plt.title('Errors History (J)') plt.plot(np.array([x for (x, t) in test]), np.array([t for (x, t) in test]), color='red') plt.plot(np.array([x for (x, t) in test]), np.array([n.forward(x) for (x, t) in test]), color='green') # plt.ylim([0, 2]) plt.xlabel('x')