def play(model): global lastState global lastaction car_distance = 0 game_state = carmunk.GameState() # Do nothing to get initial. _, state = game_state.frame_step((2)) train_state = np.append(lastState, state[0]) train_state = np.append(train_state, lastaction) train_state = np.expand_dims(train_state, axis=0) # Move. while True: car_distance += 1 # Choose action. action = (np.argmax(model.predict(train_state, batch_size=1))) print(action) # Take action. _, state = game_state.frame_step(action) train_state = np.append(lastState, state[0]) train_state = np.append(train_state, action) train_state = np.expand_dims(train_state, axis=0) lastState = state[0] # Tell us something. if car_distance % 1000 == 0: print("Current distance: %d frames." % car_distance)
def play(model): car_distance = 0 game_state = carmunk.GameState() # Do nothing to get initial. _, state = game_state.frame_step((2)) # Move. while True: car_distance += 1 # Choose action. #action = (np.argmax(model.predict(state, batch_size=1))) # Take action. if random.random() < 0.4: action = np.random.randint(0, 3) # random else: # Get Q values for each action. qval = model.predict(state, batch_size=1) action = (np.argmax(qval)) # best _, state = game_state.frame_step(action) # Tell us something. if car_distance % 1000 == 0: print("Current distance: %d frames." % car_distance)
def play(model): """ DOCSTRING """ car_distance = 0 game_state = carmunk.GameState() _, state = game_state.frame_step((2)) while True: car_distance += 1 action = (numpy.argmax(model.predict(state, batch_size=1))) _, state = game_state.frame_step(action) if car_distance % 1000 == 0: print("Current distance: %d frames." % car_distance)
def play(model): car_distance = 0 game_state = carmunk.GameState() # Do nothing to get initial. _, state = game_state.frame_step((2)) # Move. while True: car_distance += 1 # Choose action. action = (np.argmax(model.predict(state, batch_size=1))) # Take action. _, state = game_state.frame_step(action) # Tell us something. if car_distance % 1000 == 0: print("Current distance: %d frames." % car_distance)
def train_net(model, params): """ DOCSTRING """ filename = params_to_filename(params) observe = 1000 # Number of frames to observe before training. epsilon = 1 train_frames = 1000000 # Number of frames to play. batchSize = params['batchSize'] buffer = params['buffer'] max_car_distance = 0 car_distance = 0 t = 0 data_collect = [] replay = [] # stores tuples of (S, A, R, S'). loss_log = [] game_state = carmunk.GameState() _, state = game_state.frame_step((2)) start_time = timeit.default_timer() while t < train_frames: t += 1 car_distance += 1 if random.random() < epsilon or t < observe: action = np.random.randint(0, 3) else: qval = model.predict(state, batch_size=1) action = (np.argmax(qval)) # best reward, new_state = game_state.frame_step(action) replay.append((state, action, reward, new_state)) if t > observe: if len(replay) > buffer: replay.pop(0) minibatch = random.sample(replay, batchSize) X_train, y_train = process_minibatch(minibatch, model) history = nn.LossHistory() model.fit(X_train, y_train, batch_size=batchSize, nb_epoch=1, verbose=0, callbacks=[history]) loss_log.append(history.losses) state = new_state if epsilon > 0.1 and t > observe: epsilon -= (1 / train_frames) if reward == -500: data_collect.append([t, car_distance]) if car_distance > max_car_distance: max_car_distance = car_distance tot_time = timeit.default_timer() - start_time fps = car_distance / tot_time print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" % (max_car_distance, t, epsilon, car_distance, fps)) car_distance = 0 start_time = timeit.default_timer() if t % 25000 == 0: model.save_weights('saved-models/' + filename + '-' + str(t) + '.h5', overwrite=True) print("Saving model %s - %d" % (filename, t)) log_results(filename, data_collect, loss_log)
def train_net(model, params): filename = params_to_filename(params) observe = 1000 # Number of frames to observe before training. epsilon = 1 train_frames = 100000 # Number of frames to play. batchSize = params['batchSize'] buffer = params['buffer'] # Just stuff used below. max_car_distance = 0 car_distance = 0 t = 0 data_collect = [] replay = [] # stores tuples of (S, A, R, S'). loss_log = [] # Create a new game instance. game_state = carmunk.GameState() # Get initial state by doing nothing and getting the state. _, state = game_state.frame_step((2)) # Let's time it. start_time = timeit.default_timer() # Run the frames. while t < train_frames: t += 1 car_distance += 1 # Choose an action. if random.random() < epsilon or t < observe: action = np.random.randint(0, 3) # random else: # Get Q values for each action. qval = model.predict(state, batch_size=1) action = (np.argmax(qval)) # best # Take action, observe new state and get our treat. reward, new_state = game_state.frame_step(action) # Experience replay storage. replay.append((state, action, reward, new_state)) # If we're done observing, start training. if t > observe: # If we've stored enough in our buffer, pop the oldest. if len(replay) > buffer: replay.pop(0) # Randomly sample our experience replay memory minibatch = random.sample(replay, batchSize) # Get training values. X_train, y_train = process_minibatch2(minibatch, model) # Train the model on this batch. history = LossHistory() model.fit(X_train, y_train, batch_size=batchSize, nb_epoch=1, verbose=0, callbacks=[history]) loss_log.append(history.losses) # Update the starting state with S'. state = new_state # Decrement epsilon over time. if epsilon > 0.1 and t > observe: epsilon -= (1.0 / train_frames) # We died, so update stuff. if reward == -500: # Log the car's distance at this T. data_collect.append([t, car_distance]) # Update max. if car_distance > max_car_distance: max_car_distance = car_distance # Time it. tot_time = timeit.default_timer() - start_time fps = car_distance / tot_time # Output some stuff so we can watch. print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" % (max_car_distance, t, epsilon, car_distance, fps)) # Reset. car_distance = 0 start_time = timeit.default_timer() # Save the model every 25,000 frames. if t % 25000 == 0: model.save_weights('saved-models/' + filename + '-' + str(t) + '.h5', overwrite=True) print("Saving model %s - %d" % (filename, t)) # Log results after we're done all frames. log_results(filename, data_collect, loss_log)