observation, reward, done, _ = env.step(action) if done: break if __name__ == '__main__': env = gym.make('BipedalWalker-v2') env = gym.wrappers.Monitor(env, 'bipedalwalker', video_callable=lambda episode_id: True, force=True) env.seed(123) observation = env.reset() INPUT_SIZE = 24 HIDDEN_SIZE = 16 OUTPUT_SIZE = 4 rnn = RNN(10, 24, 12, OUTPUT_SIZE) # rnn.load('../../../models/bipedalwalker/09-23-2019_07-09_NN=RNNIndividual_POPSIZE=50_GEN=2000_PMUTATION_0' # '.3_PCROSSOVER_0.8.npy') # test_rnn() mlp = MLP(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE) # mlp.load("../../../models/bipedalwalker/09-07-2019_16-34-56_POPSIZE=30_GEN=5_MUTATION_0.609-07-2019_16-34" # "-56_POPSIZE=30_GEN=5_MUTATION_0.6.npy") # test_mlp() # Model 09-14-2019 NN: 24 - 32 - 12 - 4 # Model 09-21-2019 NN: 10 - 24 - 12 - 4 # Model 09-26-2019 NN: 5 - 16 - 12 - 4 # Model 09-30-2019 NN: 10 - 16 - 12 - 4 mlp_torch = MLPTorch(10, 16, 12, OUTPUT_SIZE) mlp_torch.load("../../../models/bipedalwalker/10-21-2019_02-57_NN=MLPTorchIndividual_POPSIZE=30_GEN"
print max_length ############### # Build Model # ############### # Symbolic variables x = T.tensor3(dtype=theano.config.floatX) y = T.imatrix() mask = T.ivector() # Construct RNN class classifier = RNN(input=x, n_in=INPUT_DIM, n_hidden=NEURONS_PER_LAYER, n_out=OUTPUT_DIM, n_layers=HIDDEN_LAYERS, n_total=max_length, batch=BATCH_SIZE, mask=mask) # Cost cost = classifier.negative_log_likelihood(y) + classifier.L2_sqr # Build Gradient dparams = [T.grad(cost, param) for param in classifier.params] # Build Train Model print "Building Train Model..." train_model = theano.function(inputs=[x, y, mask], outputs=cost, updates=Update(classifier.params, dparams,
def get_model(self, input_size, hidden_size, output_size) -> NeuralNetwork: return RNN(input_size, hidden_size, 12, output_size)