Python LossHistory примеры использования

Язык программирования: Python

Пространство имен/Пакет: nn

Класс/Тип: LossHistory

Примеров на hotexamples.com: 16

Python LossHistory - 16 примеров найдено. Это лучшие примеры Python кода для nn.LossHistory, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

LossHistory(16)

Основные методы

LossHistory (16)

Пример #1

Показать файл

Файл: trainvrep.py Проект: CuriosityCreations/VREP

def train_models(X_train, y_train, batchSize, model, loss_log):
    history = LossHistory()
    model.fit(X_train,
              y_train,
              batch_size=batchSize,
              nb_epoch=1,
              verbose=0,
              callbacks=[history])
    loss_log.append(history.losses)

    return loss_log

Пример #2

Показать файл

def train_net(model, params):

    filename = params_to_filename(params)

    observe = 1000  # Number of frames to observe before training.
    epsilon = 1
    train_frames = 100000  # Number of frames to play.
    batchSize = params['batchSize']
    buffer = params['buffer']

    # Just stuff used below.
    max_car_distance = 0
    car_distance = 0
    min_distance = 10000
    t = 0
    data_collect = []
    replay = []  # stores tuples of (S, A, R, S').

    loss_log = []

    # Create a new game instance.
    game_state = carmunk.GameState()

    # Get initial state by doing nothing and getting the state.
    _, state,_= game_state.frame_step((2))

    # Let's time it.
    start_time = timeit.default_timer()

    # Run the frames.
    while t < train_frames:

        t += 1
        car_distance += 1

        # Choose an action.
        # if random.random() < epsilon or t < observe:
        #     action = np.random.randint(0, 3)  # random
        # else:
            # Get Q values for each action.
        qval = model.predict(state, batch_size=1)
        action = (np.argmax(qval))  # best

        # Take action, observe new state and get our treat.
        reward, new_state, distance = game_state.frame_step(action)

        # Experience replay storage.
        replay.append((state, action, reward, new_state))

        # If we're done observing, start training.
        if t > observe:

            # If we've stored enough in our buffer, pop the oldest.
            if len(replay) > buffer:
                replay.pop(0)

            # Randomly sample our experience replay memory
            minibatch = random.sample(replay, batchSize)   # mot liss gom 64 tuble, moi tuble gom 4 phan tu (S, A, R, S')

            # Get training values.
            X_train, y_train = process_minibatch2(minibatch, model)

            # Train the model on this batch.
            history = LossHistory()
            model.fit(
                X_train, y_train, batch_size=batchSize,
                nb_epoch=1, verbose=0, callbacks=[history]
            )
            loss_log.append(history.losses)

        # Update the starting state with S'.
        state = new_state

        # Decrement epsilon over time.
        if epsilon > 0.1 and t > observe:
            epsilon -= (1.0/train_frames)

        if distance < min_distance:
            min_distance = distance

        # We died, so update stuff.
        if reward == -500:
            # Log the car's distance at this T.
            data_collect.append([t, car_distance])

            # Update max.
            if car_distance > max_car_distance:
                max_car_distance = car_distance

            # Time it.
            tot_time = timeit.default_timer() - start_time
            fps = car_distance / tot_time

            # Output some stuff so we can watch.
            # print("Max_car_distance: %d at %d\tepsilon %f\t(%d)\tdistance %d\t%f fps" %
            #       (max_car_distance, t, epsilon, car_distance, distance, fps))

            # Reset.
            car_distance = 0
            start_time = timeit.default_timer()
        if t % 10 == 0:
            print("Max_car_distance: %d at %d\tepsilon %f\t(%d)\tdistance %d \tmin_distance %d" %
                 (max_car_distance, t, epsilon, car_distance, distance, min_distance))

        # Save the model every 25,000 frames.
        if t % 10000 == 0:
            model.save_weights('saved-models/' + filename + '-' +
                               str(t) + '.h5',
                               overwrite=True)
            print("Saving model %s - %d" % (filename, t))
        
    # Log results after we're done all frames.
    log_results(filename, data_collect, loss_log)

Пример #3

Показать файл

  reward = np.dot(train,weightReadings)
  reward = reward.astype(int)
  
  if trainCount > observe:
    # If we've stored enough in our buffer, pop the oldest.
    if len(replay) > buffer:
      replay.pop(0)
    
	# Randomly sample our experience replay memory
    minibatch = random.sample(replay, batchSize)
    
    # Get training values by Sarsa 0
    X_train, y_train = tv.sarsa0_minibatch(minibatch, model, sarsa0P)

    # Train the model on this batch.
    history = LossHistory()
    model.fit(
      X_train, y_train, batch_size=batchSize,
      nb_epoch=1, verbose=0, callbacks=[history]
    )
    loss_log.append(history.losses)

	
  state = new_state
  
  if epsilon > final_epsilon and trainCount > observe:
    epsilon -= (1/train_frames)
  print (epsilon)
  
  # Save the model every 25,000 frames.
  filename = 'train5'

Пример #4

Показать файл

Файл: learning.py Проект: AndreiIacob/irassh

    def update_replay(self, reward, new_state, action=None):
        if action is None:
            action = self.lastAction

        # Experience replay storage.
        self.replay.append(
            (np.copy(self.old_state), action, reward, np.copy(new_state)))

        # If we're done observing, start training.
        if self.t > self.observe:
            # If we've stored enough in our buffer, pop the oldest.
            if len(self.replay) > self.buffer:
                self.replay.pop(0)

            # Randomly sample our experience replay memory
            minibatch = random.sample(self.replay, self.batchSize)

            # Get training values.
            X_train, y_train = process_minibatch2(minibatch, self.model,
                                                  self.sequence_length,
                                                  self.end_value, self.GAMMA)

            # Train the model on this batch.
            history = LossHistory()
            self.model.fit(X_train,
                           y_train,
                           batch_size=self.batchSize,
                           nb_epoch=1,
                           verbose=0,
                           callbacks=[history])
            self.loss_log.append(history.losses)

            if self.t % self.save_every == 0:
                if len(self.data_collect) > 50:
                    # Save the results to a file so we can graph it later.
                    learn_f = 'results/command-frames/learn_data-' + self.filename + '.csv'
                    with open(learn_f, 'w', newline='') as data_dump:
                        wr = csv.writer(data_dump)
                        wr.writerows(self.data_collect)
                    plotting.plot_file(learn_f, 'learn')

                if len(self.loss_log) > 500:
                    loss_f = 'results/command-frames/loss_data-' + self.filename + '.csv'
                    with open(loss_f, 'w', newline='') as lf:
                        wr = csv.writer(lf)
                        for loss_item in self.loss_log:
                            wr.writerow(loss_item)

                    plotting.plot_file(loss_f, 'loss')

        # Update the starting state with S'.
        self.state = new_state

        # Decrement epsilon over time.
        if self.epsilon > 0.1 and self.t > self.observe:
            self.epsilon -= (1.0 / self.train_frames)

        # We died, so update stuff.
        if reward == -500:
            # Log the car's distance at this T.
            print([self.t, self.hacker_cmds])
            self.data_collect.append([self.t, self.hacker_cmds])

            # Update max.
            if self.hacker_cmds > self.max_hacker_cmds:
                self.max_hacker_cmds = self.hacker_cmds

            # Time it.
            tot_time = timeit.default_timer() - self.start_time
            fps = self.hacker_cmds / tot_time

            # Output some stuff so we can watch.
            print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" %
                  (self.max_hacker_cmds, self.t, self.epsilon,
                   self.hacker_cmds, fps))

            # Reset.
            self.hacker_cmds = 0
            start_time = timeit.default_timer()

        # Save the model every 25,000 frames.
        if self.t % self.save_every == 0:
            pickle._dump(
                self.replay,
                open(self.save_replay_file_prefix + "-" + str(self.t), "wb"))
            model_save_filename = self.save_model_file_prefix + self.filename + '-' + str(
                self.t) + '.h5'
            self.model.save_weights(model_save_filename, overwrite=True)
            print("Saving model %s - %d" % (self.filename, self.t))

Пример #5

Показать файл

Файл: trainning.py Проект: haozhe15/robot-navigation-in-crowds

def train(model, params):
    filename = params_to_filename(params)

    EPISODE = 10
    FRAMES = 4000
    OBSERVE = FRAMES * 3
    epsilon = 1
    batchSize = params['batchSize']
    buffer = params['buffer']
    replay = []
    minibatch = []
    total_frames = 0
    path_log = []
    loss_log = []

    # min_path_length = 0

    for m in range(EPISODE):
        print("Episode: %d" % (m))
        gameObject = GameClass(draw_screen=True, display_path=True, fps=FPS)

        # Choose no action in the initial frame
        action = 2
        reward, state = gameObject.frame_step(action)
        for t in range(FRAMES):
            total_frames += 1

            if t % (FRAMES / 10) == 0:
                print("Frames: %d" % (t))

            # Choose the action based on the epsilon greedy algorithm
            if (random.random() < epsilon
                    or total_frames < OBSERVE):  # choose random action
                action = np.random.randint(0, 3)
            else:  # choose best action from Q(s,a) values
                # Let's run our Q function on (state,action) to get Q values for all possible actions
                Q = np.zeros(3)
                for a in range(3):
                    features = get_features(state, a)
                    Q[a] = model.predict(features, batch_size=batchSize)
                    action = (np.argmax(Q))

            # Execute the action, observe new state and reward
            reward, state_new = gameObject.frame_step(action)
            path_length = gameObject.num_steps

            # Store the (state, action, reward, new state) pair in the replay
            memory = state, action, reward, state_new
            replay.append(memory)

            # If we've stored enough in our buffer, pop the oldest.
            if len(replay) > buffer:
                replay.pop(0)

            # Randomly sample our experience replay memory if we have enough samples
            if total_frames > OBSERVE:
                minibatch = random.sample(replay, batchSize)

                # Process the minibatch to get the training data
                X_train, y_train = process_minibatch(minibatch, model,
                                                     batchSize)

                # Train the model on this batch.
                history = LossHistory()
                model.fit(X_train,
                          y_train,
                          batch_size=batchSize,
                          verbose=0,
                          callbacks=[history])
                loss_log.append(history.losses)

                # Decrement epsilon over time.
                if epsilon > 0.1:
                    epsilon -= 1.0 / (FRAMES * EPISODE - OBSERVE)

            # Update the starting state with S'.
            state = state_new

            # Stop this episode if we achieved the goal
            if gameObject.check_reach_goal():
                # Log the robot's path length
                path_log.append([m, path_length])

                # # Update the min
                # if path_length < min_path_length:
                #     min_path_length = path_length

                # # Output some stuff so we can watch.
                # print("Min: %d \t epsilon %f\t(%d)" %
                #   (min_path_length, epsilon, path_length))

                # Stop this episode
                break

        # Save the model every episode after observation.
        if total_frames > OBSERVE:
            model.save('saved-models/model_nn-' + filename + '-' + str(m) +
                       '.h5',
                       overwrite=True)
            print("Saving model %s - %d" % (filename, m))

    # Log results after we're done all episodes.
    log_results(filename, path_log, loss_log, m)

Пример #6

Показать файл

Файл: training.py Проект: xpharry/simple-pycar

def train_net(model, params, mode='grid'):

    observe = 1000  # Number of frames to observe before training.
    epsilon = 1
    train_frames = 10000  # Number of frames to play.
    train_frames = TRAIN_FRAMES

    filename = params_to_filename(params, mode, train_frames)
    print(filename)

    if mode == 'lane_following':
        rate = 10  # Hz
        screen = pygame.display.set_mode((1300, 600))
        pygame.display.set_caption("mdeyo car sim")
        background = pygame.Surface(screen.get_size())
        background.fill((0, 0, 0))
        RED = (255, 0, 0)
        car = Car2(RED, 60, 385, screen, 100)
        road = CurvedRoad(1200, 60, 385, '45')
        car.constant_speed = True
        state = road.getState(car)
        print('state:', state)

    if mode == 'grid':
        # Create a new game instance.
        # game_state = carmunk.GameState()
        grid = Grid(X_DIM, Y_DIM)
        car = Car(grid, 0, 0)
        game_state = World(grid, car, 500, 10, False)
        # Get initial state by doing nothing and getting the state.
        _, state = game_state.updateState(0)

    batchSize = params['batchSize']
    buffer = params['buffer']

    # Just stuff used below.
    max_car_reward = -999999
    car_reward = 0
    t = 0
    data_collect = []
    replay = []  # stores tuples of (S, A, R, S')
    loss_log = []

    # Let's time it.
    start_time = timeit.default_timer()

    # Run the frames.
    while t < train_frames:

        t += 1

        if mode == 'grid':
            # Choose an action.
            if random.random() < epsilon or t < observe:
                action = np.random.randint(0, 3)  # random
            else:
                # Get Q values for each action.
                qval = model.predict(state, batch_size=1)
                action = (np.argmax(qval))  # best

            # Take action, observe new state and get our treat.
            #reward, new_state = game_state.frame_step(action)
            car_reward, new_state = game_state.updateState(action)
            # car_reward = reward
            # print(reward)

        elif mode == 'lane_following':
            # Choose an action.
            if random.random() < epsilon or t < observe:
                action = np.random.randint(0, 3)  # random
                # actions currently are 0 = no input (drive straight)
                #                       1 = left turn input
                #                       2 = right turn input
            else:
                # Get Q values for each action.
                qval = model.predict(state, batch_size=1)
                action = (np.argmax(qval))  # best

            # Take action, observe new state and get our treat.
            # print(action)
            car.takeAction(action)
            car.update(1 / rate)
            road.plotRoad(screen)

            new_state = road.getState(car)
            (car_reward, done) = road.reward(car)

            # --- Go ahead and update the screen with what we've drawn.
            pygame.display.flip()

            # --- Limit to 60 frames per second
            # clock.tick(rate)
            # print(car_reward)

        # Experience replay storage.
        print(t, 'reward', car_reward)
        # print('state:', state, 'action', action, 'reward',
        #       car_reward, 'new_state', new_state)
        replay.append((state, action, car_reward, new_state))

        # If we're done observing, start training.
        if t > observe:

            # If we've stored enough in our buffer, pop the oldest.
            if len(replay) > buffer:
                replay.pop(0)

            # Randomly sample our experience replay memory
            minibatch = random.sample(replay, batchSize)

            # Get training values.
            X_train, y_train = process_minibatch(minibatch, model)

            # Train the model on this batch.
            history = LossHistory()
            model.fit(X_train,
                      y_train,
                      batch_size=batchSize,
                      epochs=1,
                      verbose=0,
                      callbacks=[history])
            loss_log.append(history.losses)

        # Update the starting state with S'.
        state = new_state

        # print(state)
        # game_state.grid.printGrid()
        # print(reward)

        # Decrement epsilon over time.
        if epsilon > 0.1 and t > observe:
            epsilon -= (1 / train_frames)

        # We died, so update stuff.
        if done == 1:
            # if reward > 0 or reward==-999:
            # Log the car's distance at this T.
            data_collect.append([t, car_reward])

            # Update max.
            if car_reward > max_car_reward:
                max_car_reward = car_reward

            # Time it.
            tot_time = timeit.default_timer() - start_time
            # fps = car_distance / tot_time

            # Output some stuff so we can watch.
            print("Max: %d at %d\tepsilon %f\t(%d)\t" %
                  (max_car_reward, t, epsilon, car_reward))

            # Reset.
            car_reward = 0
            start_time = timeit.default_timer()

        # Save the model every 25,000 frames.
        if t % 100 == 0:
            print(t)
        if t % 2000 == 0:
            model.save_weights('saved-models/' + filename + '-' + str(t) +
                               '.h5',
                               overwrite=True)
            print("Saving model %s - %d" % (filename, t))

    # Log results after we're done all frames.
    print(train_frames)
    log_results(filename, data_collect, loss_log, train_frames, observe)

Пример #7

Показать файл

def train_net(model, params):

    filename = params_to_filename(params)
    observe = 129  # Number of frames to observe before training.
    epsilon = 0.5
    train_frames = 50000  # Number of frames to play.
    steps = 0
    batchSize = params['batchSize']
    buffer = params['buffer']

    # Just stuff used below.
    max_car_distance = 0
    car_distance = 0
    t = 0
    data_collect = []
    replay = []  # stores tuples of (S, A, R, S'). #to be displayed

    loss_log = []

    # Create a new game instance.
    game_state = carmunk.GameState()

    # Get initial state by doing nothing and getting the state.
    _, state, _ = game_state.frame_step((2))

    # Let's time it.
    start_time = timeit.default_timer()

    # Run the frames.

    while t < train_frames:
        print(t)
        t += 1
        car_distance += 1

        # Choose an action.
        if random.random() < epsilon or t < observe:
            action = np.random.randint(0, 5)  # random
        else:
            # Get Q values for each action.
            print("PREDICTED", state)
            # time.sleep(1)
            x = state[0]
            y = state[1]
            qval = model.predict(np.array([x, y]).reshape((1, 2)),
                                 batch_size=1)
            action = (np.argmax(qval))  # best

        # Take action, observe new state and get our treat.
        reward, new_state, term = game_state.frame_step(action)
        print("timestep :" + str(t) + "Reward" + str(reward) + "action" +
              str(action) + "state" + str(state))
        # Experience replay storage.
        replay.append((state, action, reward, new_state))
        # print(len(replay))
        # If we're done observing, start training.
        if t > observe:
            #print("start")
            # If we've stored enough in our buffer, pop the oldest.
            if len(replay) > buffer:
                replay.pop(0)

            # Randomly sample our experience replay memory
            minibatch = random.sample(replay, batchSize)

            # Get training values.
            X_train, y_train = process_minibatch2(minibatch, model)
            # Train the model on this batch.
            history = LossHistory()
            model.fit(X_train,
                      y_train,
                      batch_size=batchSize,
                      verbose=0,
                      callbacks=[history])
            loss_log.append(history.losses)
            steps += 1
            if steps % 1000 == 0:
                print("Step = " + str(steps), "Epsilon = " + str(epsilon))
        # Update the starting state with S'.
        state = new_state

        # Decrement epsilon over time.
        if epsilon > 0.1 and t > observe:
            epsilon -= (10.0 / train_frames)
            print("EPSILON UPDATED", epsilon)

        # We died, so update stuff.
        if term == 1:
            # print("Crashed.")
            # Log the car's distance at this T.
            data_collect.append([t, car_distance])
            continue
            # Reset.
            car_distance = 0
        # We reached the goal, so update stuff.
        elif term == 2:
            print("Reached goal.", car_distance)
            # Log the car's distance at this T.
            data_collect.append([t, car_distance])
            continue
            # Reset.
            car_distance = 0

        # Save the model every 25,000 frames.
        if t % 25000 == 0:
            model.save_weights('saved-models/' + filename + '-' + str(t) +
                               '.h5',
                               overwrite=True)
            print("Saving model %s - %d" % (filename, t))
        # if(keyboard.is_pressed('8')):
        #     print("Reset Goal")
        #     game_state.reset_goal()
        print(t, reward, action)

Пример #8

Показать файл

def train_net(model, params):

    filename = params_to_filename(params)

    observe = 1000  # Number of frames to observe before training.
    epsilon = 1
    train_frames = 1002  # Number of frames to play.
    reward = 0
    death = 0
    printstuff = ''
    batchSize = params['batchSize']
    buffer = params['buffer']

    # Just stuff used below.
    max_car_distance = 0
    car_distance = 0
    max_reward = 0
    t = 0
    data_collect = []
    replay = []  # stores tuples of (S, A, R, S').

    loss_log = []

    # Create a new game instance.
    game_state = carmunkStatic.GameState()

    # Get initial state by doing nothing and getting the state.
    _, state, nothing = game_state.frame_step((2))

    # Let's time it.
    start_time = timeit.default_timer()

    # Run the frames.
    while t < train_frames:

        t += 1
        car_distance += 1

        # Choose an action.
        if random.random() < epsilon or t < observe:
            action = np.random.randint(0, 4)  # random 0-1-2
        else:
            # Get Q values for each action.
            qval = model.predict(state, batch_size=1)
            action = (np.argmax(qval))  # best

        # Take action, observe new state and get our treat.
        reward, new_state, printstuff = game_state.frame_step(action)

        # Experience replay storage.
        replay.append((state, action, reward, new_state))

        # If we're done observing, start training.
        if t > observe:

            # If we've stored enough in our buffer, pop the oldest.
            if len(replay) > buffer:
                replay.pop(0)

            # Randomly sample our experience replay memory
            minibatch = random.sample(replay, batchSize)

            # Get training values.
            X_train, y_train = process_minibatch(minibatch, model)

            # Train the model on this batch.
            history = LossHistory()
            model.fit(X_train,
                      y_train,
                      batch_size=batchSize,
                      nb_epoch=1,
                      verbose=0,
                      callbacks=[history])
            loss_log.append(history.losses)

        # Update the starting state with S'.
        state = new_state

        # Decrement epsilon over time.
        if epsilon > 0.1 and t > observe:
            epsilon -= (1 / train_frames)

        #Update max
        if reward > max_reward:
            max_reward = reward

        # We died, so update stuff.
        if reward == -500:
            # Log the car's distance at this T.
            data_collect.append([t, car_distance])

            # Update max.
            if car_distance > max_car_distance:
                max_car_distance = car_distance

            # Time it.
            tot_time = timeit.default_timer() - start_time
            fps = car_distance / tot_time

            # Output some stuff so we can watch.
            print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" %
                  (max_car_distance, t, epsilon, car_distance, fps))

            print("Max reward : %d", max_reward)

            # Reset.
            car_distance = 0
            start_time = timeit.default_timer()

            #update death
            death += 1
            if t > observe & death > 10:
                return
    print(printstuff)

Пример #9

Показать файл

def train_net(best_action_model, params):

    filename = params_to_filename(params)

    observe = 1000  # Number of frames to observe before training.
    epsilon = 1
    train_frames = 500000  # Number of frames to play. was 1000000
    batchSize = params['batchSize']
    buffer = params['buffer']

    # Just stuff used below.
    max_car_distance = 0
    car_distance = 0
    t = 0
    cum_rwd = 0
    cum_rwd_read = 0
    cum_rwd_dist = 0
    cum_rwd_speed = 0

    data_collect = []
    replay = []  # stores tuples of (S, A, R, S').
    save_init = True
    loss_log = []

    # Create a new game instance.
    game_state = carmunk.GameState()

    # Get initial state by doing nothing and getting the state.
    state, new_reward, cur_speed, _, _, _ = game_state.frame_step(
        START_ACTION, START_SPEED, START_DISTANCE)

    # frame_step returns reward, state, speed
    #state = state_frames(state, np.array([[0, 0, 0, 0, 0, 0, 0]])) # zeroing distance readings
    #state = state_frames(state, np.zeros((1,NUM_SENSORS))) # zeroing distance readings

    # Let's time it.
    start_time = timeit.default_timer()

    # Run the frames.
    while t < train_frames:

        #time.sleep(0.5)

        t += 1
        car_distance += 1

        # Choose an action.
        if random.random() < epsilon or t < observe:
            action = np.random.randint(0, NUM_OUTPUT)  # random
        else:
            # Get Q values for each action
            qval = best_action_model.predict(state, batch_size=1)
            # best_action_model was passed to this function. call it w/ current state
            action = (np.argmax(qval))  # best prediction

        # Take action, observe new state and get our treat.
        new_state, new_reward, new_speed, new_rwd_read, new_rwd_dist, new_rwd_speed = \
            game_state.frame_step(action, cur_speed, car_distance)

        # Use multiple frames.
        #new_state = state_frames(new_state, state) # seems this is appending 2-3 moves, results

        # Experience replay storage.
        replay.append((state, action, new_reward, new_state))

        # If we're done observing, start training.
        if t > observe:

            # If we've stored enough in our buffer, pop the oldest.
            if len(replay) > buffer:
                replay.pop(0)

            # Randomly sample our experience replay memory
            minibatch = random.sample(replay, batchSize)
            # WHY RANDOM SAMPLE? COULD TRAINING BE SPED UP BY TAKING LAST BATCHSIZE

            # Get training values.
            X_train, y_train = process_minibatch(minibatch, best_action_model)

            # Train the best_action_model on this batch.
            history = LossHistory()
            best_action_model.fit(X_train,
                                  y_train,
                                  batch_size=batchSize,
                                  nb_epoch=1,
                                  verbose=0,
                                  callbacks=[history])
            loss_log.append(history.losses)

        # Update the starting state with S'.
        state = new_state
        cur_speed = new_speed
        cum_rwd += new_reward
        cum_rwd_read += new_rwd_read
        cum_rwd_dist += new_rwd_dist
        cum_rwd_speed += new_rwd_speed

        # Decrement epsilon over time.
        if epsilon > 0.1 and t > observe:
            epsilon -= (1 / train_frames)

        # We died, so update stuff.
        if new_reward == -500 or new_reward == -1000:
            # Log the car's distance at this T.
            data_collect.append([t, car_distance])

            # Update max.
            if car_distance > max_car_distance:
                max_car_distance = car_distance

            # Time it.
            tot_time = timeit.default_timer() - start_time
            fps = car_distance / tot_time

            # Output some stuff so we can watch.
            print("Max: %d at %d\t eps: %f\t dist: %d\t rwd: %d\t read: %d\t dist: %d\t speed: %d\t fps: %d" %
                  (max_car_distance, t, epsilon, car_distance, cum_rwd, \
                   cum_rwd_read, cum_rwd_dist, cum_rwd_speed, int(fps)))

            # Reset.
            car_distance = 0
            cum_rwd = 0
            cum_rwd_read = 0
            cum_rwd_dist = 0
            cum_rwd_speed = 0
            start_time = timeit.default_timer()

        # Save early best_action_model, then every 20,000 frames
        if t % 50000 == 0:
            save_init = False
            best_action_model.save_weights('saved-best_action_models/' +
                                           filename + '-' + str(t) + '.h5',
                                           overwrite=True)
            print("Saving best_action_model %s - %d" % (filename, t))

    # Log results after we're done all frames.
    log_results(filename, data_collect, loss_log)

Пример #10

Показать файл

def train_net(model, params):

    filename = params_to_filename(params)

    observe = 1000  # Number of frames to observe before training.
    epsilon = 1
    train_frames = 300000  # Number of frames to play.
    batchSize = params['batchSize']
    buffer = params['buffer']

    # Just stuff used below.
    max_car_distance = 0
    car_distance = 0

    #needed to print information
    global max_reward
    global stuff
    global b_state
    global max_qVal
    frame = 0
    t = 0
    data_collect = []
    replay = []  # stores tuples of (S, A, R, S').

    loss_log = []

    # Create a new game instance.
    game_state = carmunk.GameState()

    # Get initial state by doing nothing and getting the state.
    _, state, stuff = game_state.frame_step((2))

    # Let's time it.
    start_time = timeit.default_timer()

    # Run the frames.
    while t < train_frames:

        t += 1
        frame += 1
        car_distance += 1

        # Choose an action.
        if random.random() < epsilon or t < observe:
            action = np.random.randint(0, 4)  # random 0-1-2-3
        else:
            # Get Q values for each action.
            qval = model.predict(state, batch_size=1)
            action = (np.argmax(qval))  # best

        # Take action, observe new state and get our treat.
        reward, new_state, somestuff = game_state.frame_step(action)
        if reward > max_reward:
            stuff = somestuff
        # Experience replay storage.
        replay.append((state, action, reward, new_state))

        # If we're done observing, start training.
        if t > observe:

            # If we've stored enough in our buffer, pop the oldest.
            if len(replay) > buffer:
                replay.pop(0)

            # Randomly sample our experience replay memory
            minibatch = random.sample(replay, batchSize)

            # Get training values.
            X_train, y_train = process_minibatch(minibatch, model)

            # Train the model on this batch.
            history = LossHistory()
            model.fit(X_train,
                      y_train,
                      batch_size=batchSize,
                      nb_epoch=1,
                      verbose=0,
                      callbacks=[history])
            loss_log.append(history.losses)

        # Update the starting state with S'.
        state = new_state

        # Decrement epsilon over time.
        if epsilon > 0.1 and t > observe:
            epsilon -= (1 / train_frames)

        # We died, so update stuff.
        if reward == -500:
            # Log the car's distance at this T.
            data_collect.append([t, car_distance])

            # Update max.
            if car_distance > max_car_distance:
                max_car_distance = car_distance

            # Time it.
            tot_time = timeit.default_timer() - start_time
            fps = car_distance / tot_time

            # Output some stuff so we can watch.
            print("\n\nMax distance: %d at %d\nepsilon %f\n(%d)\n%f fps" %
                  (max_car_distance, t, epsilon, car_distance, fps))
            print("\n Max reward : %d\t,\n max qVal : %d\t" %
                  (max_reward, max_qVal))
            print('best state', b_state)
            print(stuff)
            print("\n frame:", frame)
            # Reset.
            max_reward = 0
            stuff = ''
            car_distance = 0
            max_qVal = 0
            b_state = [0, 0, 0, 0, 0, 0, 0, 0]

            start_time = timeit.default_timer()

        # Save the model every 25,000 frames.
        if t % 25000 == 0:
            model.save_weights('saved-models/BLE/final/' + 'FINAL' + filename +
                               '-' + str(t) + '.h5',
                               overwrite=True)
            print("Saving model %s - %d" % (filename, t))

    # Log results after we're done all frames.
    log_results(filename, data_collect, loss_log)

Пример #11

Показать файл

Файл: learning.py Проект: tiwaryniraj/RL-FlapPyBird

def train_net(model, params):

    filename = params_to_filename(params)

    train_frames = 300000  # Number of frames to play.
    batchSize = params['batchSize']
    buffer = params['buffer']

    # Just stuff used below.
    t = 0
    replay = []  # stores tuples of (S, A, R, S').

    loss_log = []

    # Create a new game instance.
    game_state = flappy.Game()
    game_state.init_elements()

    # Get initial state by doing nothing and getting the state.
    state, _ = game_state.frame_step(0)

    # Run the frames.
    while t < train_frames:

        t += 1

        # Choose an action.
        qval = model.predict(np.array([state]))[0]
        action = (np.argmax(qval))  # best
        if t % 500 == 0:
            print(qval)

        # Take action, observe new state and get our treat.
        new_state, reward = game_state.frame_step(action)
        if t % 1000 == 0:
            print(t, action, state, reward)

        # Experience replay storage.
        replay.append((state, action, reward, new_state))

        # If we're done observing, start training.
        if t > batchSize:

            # If we've stored enough in our buffer, pop the oldest.
            if len(replay) > buffer:
                replay.pop(0)

            # Randomly sample our experience replay memory
            minibatch = random.sample(replay, batchSize)

            # Get training values.
            X_train, y_train = process_minibatch(minibatch, model)

            # Train the model on this batch.
            history = LossHistory()
            model.fit(X_train,
                      y_train,
                      batch_size=batchSize,
                      nb_epoch=1,
                      verbose=0,
                      callbacks=[history])
            loss_log.append(history.losses)

        # Update the starting state with S'.
        state = new_state

        if reward == -1000:
            game_state.init_elements()
            state, _ = game_state.frame_step(0)

        # Save the model every 2500 frames.
        if t % 25000 == 0:
            model.save_weights('results/saved-models/' + filename + '-' +
                               str(t) + '.h5',
                               overwrite=True)
            print("Saving model %s - %d" % (filename, t))

        if t % 50000 == 0:
            # Log results after we're done all frames.
            log_results(filename, loss_log)

Пример #12

Показать файл

Файл: trainer.py Проект: Zehui127/Self_driven-car-simulator

	def train(self,state,simulator):
		self.t+=1
		if random.random()<self.epsilon or self.t<self.observe:
			action = np.random.randint(0, 4)
		else:
			# Get Q values for each action.
			qval = self.model.predict(state, batch_size=1)
			action = (np.argmax(qval)) 
		# Take action, observe new state and get our treat.
		simulator.applyAction(action)
		reward, new_state = simulator.statusVector()
		# Experience replay storage.
		self.replay.append((state, action, reward, new_state))
		if t > observe:
			# If we've stored enough in our buffer, pop the oldest.
			if len(self.replay) > buffer:
				self.replay.pop(0)
			# Randomly sample our experience replay memory
			minibatch = random.sample(self.replay, self.batchSize)
			# Get training values.
			X_train, y_train = process_minibatch2(minibatch, self.model)
			# Train the model on this batch.
			history = LossHistory()
			model.fit(
				X_train, y_train, batch_size=batchSize,
				nb_epoch=1, verbose=0, callbacks=[history]
			)
					# Decrement epsilon over time.
		if self.epsilon > 0.1 and self.t > self.observe:
			self.epsilon -= (1.0/train_frames)

		if self.t % 25000 == 0:
			self.model.save_weights('saved-models/' + self._filename + '-' +
							   str(self.t) + '.h5',
							   overwrite=True)
			print("Saving model %s - %d" % (self._filename, self.t))
		'''TODO need to change to class functions'''
		def process_minibatch2(minibatch, model):
			# by Microos, improve this batch processing function 
			#   and gain 50~60x faster speed (tested on GTX 1080)
			#   significantly increase the training FPS
			
			# instead of feeding data to the model one by one, 
			#   feed the whole batch is much more efficient

			mb_len = len(minibatch)

			old_states = np.zeros(shape=(mb_len, 5))
			actions = np.zeros(shape=(mb_len,))
			rewards = np.zeros(shape=(mb_len,))
			new_states = np.zeros(shape=(mb_len, 5))

			for i, m in enumerate(minibatch):
				old_state_m, action_m, reward_m, new_state_m = m
				old_states[i, :] = old_state_m[...]
				actions[i] = action_m
				rewards[i] = reward_m
				new_states[i, :] = new_state_m[...]

			old_qvals = model.predict(old_states, batch_size=mb_len)
			new_qvals = model.predict(new_states, batch_size=mb_len)

			maxQs = np.max(new_qvals, axis=1)
			y = old_qvals
			non_term_inds = np.where(rewards != -500)[0]
			term_inds = np.where(rewards == -500)[0]

			y[non_term_inds, actions[non_term_inds].astype(int)] = rewards[non_term_inds] + (GAMMA * maxQs[non_term_inds])
			y[term_inds, actions[term_inds].astype(int)] = rewards[term_inds]

			X_train = old_states
			y_train = y
			return X_train, y_train

Пример #13

Показать файл

Файл: learning.py Проект: yiiwood/deep-theano-rnn-lstm-car

def train_net(turn_model, turn_model_30, turn_model_50, turn_model_70,
              avoid_model, acquire_model, acquire_model_30, acquire_model_50,
              acquire_model_70, hunt_model, pack_model, params):

    filename = params_to_filename(params)

    if cur_mode in [TURN, HUNT, PACK]:
        observe = 2000  # Number of frames to observe before training.
    else:
        observe = 2000

    epsilon = 1  # vary this based on pre-learning already occurred in lower models
    train_frames = 750000  # number of flips for training
    batchSize = params['batchSize']
    buffer = params['buffer']

    # initialize variables and structures used below.
    max_crash_frame_ctr = 0
    crash_frame_ctr = 0
    total_frame_ctr = 0
    replay_frame_ctr = 0
    stop_ctr = 0
    avoid_ctr = 0
    acquire_ctr = 0
    cum_rwd = 0
    cum_speed = 0

    data_collect = []
    replay = []
    loss_log = []  # replay stores state, action, reward, new state
    save_init = True
    cur_speeds = []
    for i in range(NUM_DRONES):
        cur_speeds.append(START_SPEED)

    # initialize drone state holders
    turn_states = np.zeros(
        [NUM_DRONES, TURN_TOTAL_SENSORS * TURN_STATE_FRAMES])
    avoid_states = np.zeros(
        [NUM_DRONES, AVOID_TOTAL_SENSORS * AVOID_STATE_FRAMES])
    acquire_states = np.zeros(
        [NUM_DRONES, ACQUIRE_NUM_SENSOR * ACQUIRE_STATE_FRAMES])
    hunt_states = np.zeros(
        [NUM_DRONES, HUNT_TOTAL_SENSORS * HUNT_STATE_FRAMES])
    drone_states = np.zeros(
        [NUM_DRONES, DRONE_TOTAL_SENSOR * PACK_STATE_FRAMES])

    # create game instance
    game_state = carmunk.GameState()

    # get initial state(s)
    turn_state, avoid_state, acquire_state, hunt_state, drone_state, reward, cur_speed = \
        game_state.frame_step(START_DRONE_ID, START_TURN_ACTION, START_SPEED_ACTION,
                              START_PACK_ACTION, START_SPEED, START_DISTANCE, 1)

    # initialize frame states
    if cur_mode in [TURN, AVOID, HUNT, PACK]:

        for i in range(NUM_DRONES):
            turn_states[i] = state_frames(
                turn_state,
                np.zeros((1, TURN_TOTAL_SENSORS * TURN_STATE_FRAMES)),
                TURN_TOTAL_SENSORS, TURN_STATE_FRAMES)

        if cur_mode in [AVOID, HUNT, PACK]:

            for i in range(NUM_DRONES):
                avoid_states[i] = state_frames(
                    avoid_state,
                    np.zeros((1, AVOID_TOTAL_SENSORS * AVOID_STATE_FRAMES)),
                    AVOID_TOTAL_SENSORS, AVOID_STATE_FRAMES)

    if cur_mode in [ACQUIRE, HUNT, PACK]:

        for i in range(NUM_DRONES):
            acquire_states[i] = state_frames(
                acquire_state,
                np.zeros((1, ACQUIRE_NUM_SENSOR * ACQUIRE_STATE_FRAMES)),
                ACQUIRE_NUM_SENSOR, ACQUIRE_STATE_FRAMES)

    if cur_mode in [HUNT, PACK]:

        for i in range(NUM_DRONES):
            hunt_states[i] = state_frames(
                hunt_state,
                np.zeros((1, HUNT_TOTAL_SENSORS * HUNT_STATE_FRAMES)),
                HUNT_TOTAL_SENSORS, HUNT_STATE_FRAMES)

    if cur_mode == PACK:

        for i in range(NUM_DRONES):
            drone_states[i] = state_frames(
                drone_state,
                np.zeros((1, DRONE_TOTAL_SENSOR * PACK_STATE_FRAMES)),
                DRONE_TOTAL_SENSOR, PACK_STATE_FRAMES)

        pack_state = state_frames(
            drone_state, np.zeros((1, PACK_TOTAL_SENSORS * PACK_STATE_FRAMES)),
            PACK_TOTAL_SENSORS, PACK_STATE_FRAMES)

    # time it
    start_time = timeit.default_timer()

    # run frames
    while total_frame_ctr < train_frames:

        total_frame_ctr += 1  # counts total training distance traveled
        crash_frame_ctr += 1  # counts distance between crashes
        replay_frame_ctr += 1  # counts frames between pack mode replay captures

        # used to slow things down for de-bugging
        #time.sleep(0.25)

        for drone_id in range(
                NUM_DRONES):  # NUM_DRONES = 1, unless you're in PACK mode

            speed_action = START_SPEED_ACTION

            # choose appropriate action(s)
            # note: only generates random inputs for currently training model.
            # all prior (sub) models provide their best (fully-trained) inputs
            if random.random(
            ) < epsilon or total_frame_ctr < observe:  # epsilon degrades over flips...
                if cur_mode == TURN:
                    turn_action = set_turn_action(
                        True, cur_speeds[drone_id],
                        np.array([turn_states[drone_id]]))
                else:
                    if cur_mode in [AVOID, HUNT, PACK]:
                        turn_action, turn_model = set_turn_action(
                            False, cur_speeds[drone_id],
                            np.array([turn_states[drone_id]]))

                    if cur_mode == AVOID:
                        speed_action = set_avoid_action(
                            True, turn_action,
                            np.array([avoid_states[drone_id]]))
                    else:
                        if cur_mode in [HUNT, PACK]:
                            speed_action = set_avoid_action(
                                False, turn_action,
                                np.array([avoid_states[drone_id]]))

                        if cur_mode == ACQUIRE:
                            acquire_action = set_acquire_action(
                                True, cur_speeds[drone_id],
                                np.array([acquire_states[drone_id, ]]))
                            turn_action = acquire_action
                        else:
                            acquire_action, acquire_model = set_acquire_action(
                                False, cur_speeds[drone_id],
                                np.array([acquire_states[drone_id, ]]))

                            if cur_mode == HUNT:
                                hunt_action, turn_action, speed_action = set_hunt_action(
                                    True, cur_speeds[drone_id], turn_action,
                                    speed_action, acquire_action,
                                    np.array([hunt_states[drone_id, ]]))
                            else:
                                hunt_action, turn_action, speed_action = set_hunt_action(
                                    False, cur_speeds[drone_id], turn_action,
                                    speed_action, acquire_action,
                                    np.array([hunt_states[drone_id, ]]))

                                if cur_mode == PACK and (
                                        total_frame_ctr == 1 or
                                    (replay_frame_ctr - 1) % PACK_EVAL_FRAMES
                                        == 0) and drone_id == 0:
                                    pack_action = set_pack_action(
                                        True, pack_state)
                                    # note: pack action only changed every PACK_EVAL_FRAMES.
                                    # for frames in between it's constant

            else:  # ...increasing use of predictions over time
                if cur_mode == TURN:
                    turn_action, turn_model = set_turn_action(
                        False, cur_speeds[drone_id],
                        np.array([turn_states[drone_id]]))
                else:
                    if cur_mode in [AVOID, HUNT, PACK]:
                        turn_action, turn_model = set_turn_action(
                            False, cur_speeds[drone_id],
                            np.array([turn_states[drone_id]]))

                    if cur_mode == AVOID:
                        speed_action = set_avoid_action(
                            False, turn_action,
                            np.array([avoid_states[drone_id]]))
                    else:
                        if cur_mode in [HUNT, PACK]:
                            speed_action = set_avoid_action(
                                False, turn_action,
                                np.array([avoid_states[drone_id]]))

                        if cur_mode == ACQUIRE:
                            acquire_action, acquire_model = set_acquire_action(
                                False, cur_speeds[drone_id],
                                np.array([acquire_states[drone_id, ]]))
                            turn_action = acquire_action
                        else:
                            acquire_action, acquire_model = set_acquire_action(
                                False, cur_speeds[drone_id],
                                np.array([acquire_states[drone_id, ]]))

                            if cur_mode == HUNT:
                                hunt_action, turn_action, speed_action = set_hunt_action(
                                    False, cur_speeds[drone_id], turn_action,
                                    speed_action, acquire_action,
                                    np.array([hunt_states[drone_id, ]]))
                            else:
                                hunt_action, turn_action, speed_action = set_hunt_action(
                                    False, cur_speeds[drone_id], turn_action,
                                    speed_action, acquire_action,
                                    np.array([hunt_states[drone_id, ]]))

                                if cur_mode == PACK and (
                                        total_frame_ctr == 1 or
                                    (replay_frame_ctr - 1) % PACK_EVAL_FRAMES
                                        == 0) and drone_id == 0:
                                    # get 1 pack action for each set of drones on first drone
                                    pack_action = set_pack_action(
                                        False, pack_state)
                                    print(pack_action)

            #print("++++++ pack action:", pack_action)
            #print(2)
            # pass action, receive new state, reward
            new_turn_state, new_avoid_state, new_acquire_state, new_hunt_state, new_drone_state, new_reward, new_speed = game_state.frame_step(
                drone_id, turn_action, speed_action, pack_action,
                cur_speeds[drone_id], total_frame_ctr, replay_frame_ctr)

            #print("********** 2. new states / rewards:")
            #print(total_frame_ctr)
            #print(drone_id)
            #print(new_drone_state)
            #print(new_reward)

            #print(3)
            # append (horizontally) historical states for learning speed.
            """ note: do this concatination even for models that are not learning (e.g., turn when running search or turn, search and acquire while running hunt) b/c their preds, performed above, expect the same multi-frame view that was in place when they trained."""

            if cur_mode in [TURN, AVOID, HUNT, PACK]:
                new_turn_state = state_frames(
                    new_turn_state, np.array([turn_states[drone_id]]),
                    TURN_TOTAL_SENSORS, TURN_STATE_FRAMES)

            if cur_mode in [AVOID, HUNT, PACK]:
                new_avoid_state = state_frames(
                    new_avoid_state, np.array([avoid_states[drone_id]]),
                    AVOID_TOTAL_SENSORS, AVOID_STATE_FRAMES)

            if cur_mode in [ACQUIRE, HUNT, PACK]:
                new_acquire_state = state_frames(
                    new_acquire_state, np.array([acquire_states[drone_id]]),
                    ACQUIRE_NUM_SENSOR, ACQUIRE_STATE_FRAMES)

            if cur_mode in [HUNT, PACK]:
                new_hunt_state = state_frames(
                    new_hunt_state, np.array([hunt_states[drone_id]]),
                    HUNT_TOTAL_SENSORS, HUNT_STATE_FRAMES)

            #print(4)
            if cur_mode == PACK and (total_frame_ctr == 1 or
                                     replay_frame_ctr % PACK_EVAL_FRAMES == 0):
                if drone_id == 0:  # for 1st drone, pack state = drone state
                    new_pack_state = new_drone_state
                    pack_rwd = new_reward

                else:  # otherwise, append drone record to prior drone state
                    new_pack_state = state_frames(new_pack_state,
                                                  new_drone_state,
                                                  DRONE_TOTAL_SENSOR, 2)
                    pack_rwd += new_reward

                new_drone_state = state_frames(
                    new_drone_state, np.array([drone_states[drone_id]]),
                    DRONE_TOTAL_SENSOR, PACK_STATE_FRAMES)

                if drone_id == (NUM_DRONES -
                                1):  # for last drone build pack record
                    if total_frame_ctr == 1:
                        pack_state = np.zeros(
                            (1, PACK_TOTAL_SENSORS * PACK_STATE_FRAMES))

                    new_pack_state = state_frames(
                        new_pack_state, pack_state, PACK_TOTAL_SENSORS,
                        PACK_STATE_FRAMES
                    )  #may need to add 1 to PACK_STATE_FRAMES

                    #print("**** 3. final pack reward:")
                    #print(pack_rwd)

            #print(5)
            # experience replay storage
            """note: only the model being trained requires event storage as it is stack that will be sampled for training below."""
            if cur_mode == TURN:
                replay.append((np.array([turn_states[drone_id]]), turn_action,
                               new_reward, new_turn_state))

            elif cur_mode == AVOID:
                replay.append((np.array([avoid_states[drone_id]]),
                               speed_action, new_reward, new_avoid_state))

            elif cur_mode == ACQUIRE:
                replay.append((np.array([acquire_states[drone_id]]),
                               turn_action, new_reward, new_acquire_state))

            elif cur_mode == HUNT:
                replay.append((np.array([hunt_states[drone_id]]), hunt_action,
                               new_reward, new_hunt_state))

            elif cur_mode == PACK and (total_frame_ctr == 1
                                       or replay_frame_ctr % PACK_EVAL_FRAMES
                                       == 0) and drone_id == (NUM_DRONES - 1):
                replay.append(
                    (pack_state, pack_action, pack_rwd, new_pack_state))
                #print(replay[-1])

            #print("6a")
            # If we're done observing, start training.
            if total_frame_ctr > observe and (
                    cur_mode != PACK or
                (replay_frame_ctr % PACK_EVAL_FRAMES == 0
                 and drone_id == (NUM_DRONES - 1))):

                # If we've stored enough in our buffer, pop the oldest.
                if len(replay) > buffer:
                    replay.pop(0)

                # Randomly sample our experience replay memory
                minibatch = random.sample(replay, batchSize)

                if cur_mode == TURN:
                    # Get training values.
                    X_train, y_train = process_minibatch(
                        minibatch, turn_model, TURN_NUM_INPUT, TURN_NUM_OUTPUT)
                    history = LossHistory()
                    turn_model.fit(X_train,
                                   y_train,
                                   batch_size=batchSize,
                                   nb_epoch=1,
                                   verbose=0,
                                   callbacks=[history])

                elif cur_mode == AVOID:
                    X_train, y_train = process_minibatch(
                        minibatch, avoid_model, AVOID_NUM_INPUT,
                        AVOID_NUM_OUTPUT)
                    history = LossHistory()
                    avoid_model.fit(X_train,
                                    y_train,
                                    batch_size=batchSize,
                                    nb_epoch=1,
                                    verbose=0,
                                    callbacks=[history])

                elif cur_mode == ACQUIRE:
                    X_train, y_train = process_minibatch(
                        minibatch, acquire_model, ACQUIRE_NUM_INPUT,
                        ACQUIRE_NUM_OUTPUT)
                    history = LossHistory()
                    acquire_model.fit(X_train,
                                      y_train,
                                      batch_size=batchSize,
                                      nb_epoch=1,
                                      verbose=0,
                                      callbacks=[history])

                elif cur_mode == HUNT:
                    X_train, y_train = process_minibatch(
                        minibatch, hunt_model, HUNT_NUM_INPUT, HUNT_NUM_OUTPUT)
                    history = LossHistory()
                    hunt_model.fit(X_train,
                                   y_train,
                                   batch_size=batchSize,
                                   nb_epoch=1,
                                   verbose=0,
                                   callbacks=[history])

                elif cur_mode == PACK:
                    X_train, y_train = process_minibatch(
                        minibatch, pack_model, PACK_NUM_INPUT, PACK_NUM_OUTPUT)
                    history = LossHistory()
                    pack_model.fit(X_train,
                                   y_train,
                                   batch_size=batchSize,
                                   nb_epoch=1,
                                   verbose=0,
                                   callbacks=[history])

                loss_log.append(history.losses)

            # Update the starting state with S'.
            if cur_mode in [TURN, AVOID, HUNT, PACK]:
                turn_states[drone_id] = new_turn_state

            if cur_mode in [AVOID, HUNT, PACK]:
                avoid_states[drone_id] = new_avoid_state

            if cur_mode in [ACQUIRE, HUNT, PACK]:
                acquire_states[drone_id] = new_acquire_state

            if cur_mode in [HUNT, PACK]:
                hunt_states[drone_id] = new_hunt_state

            if cur_mode == PACK and (total_frame_ctr == 1 or
                                     replay_frame_ctr % PACK_EVAL_FRAMES == 0):
                drone_states[drone_id] = new_drone_state

                if drone_id == (NUM_DRONES - 1):
                    pack_state = new_pack_state
                    replay_frame_ctr = 0

            cur_speeds[drone_id] = new_speed
            cum_rwd += new_reward

            # in case of crash, report and initialize
            if new_reward == -500 or new_reward == -1000:
                # Log the car's distance at this T.
                data_collect.append([total_frame_ctr, crash_frame_ctr])

                # Update max.
                if crash_frame_ctr > max_crash_frame_ctr:
                    max_crash_frame_ctr = crash_frame_ctr

                # Time it.
                tot_time = timeit.default_timer() - start_time
                fps = crash_frame_ctr / tot_time

                # Output some stuff so we can watch.
                #try:
                print(
                    "Max: %d at %d\t eps: %f\t dist: %d\t mode: %d\t cum rwd: %d\t fps: %d"
                    % (max_crash_frame_ctr, total_frame_ctr, epsilon,
                       crash_frame_ctr, cur_mode, cum_rwd, int(fps)))
                #    break
                #except (RuntimeError, TypeError, NameError):
                #    pass

                # Reset.
                crash_frame_ctr = cum_rwd = cum_speed = 0
                start_time = timeit.default_timer()

        #print(9)
        # decrement epsilon for another frame
        if epsilon > 0.1 and total_frame_ctr > observe:
            epsilon -= (1 / train_frames)

        if total_frame_ctr % 10000 == 0:
            if crash_frame_ctr != 0:
                #try:
                print(
                    "Max: %d at %d\t eps: %f\t dist: %d\t mode: %d\t cum rwd: %d"
                    % (max_crash_frame_ctr, total_frame_ctr, epsilon,
                       crash_frame_ctr, cur_mode, cum_rwd))
                #    break
                #except (RuntimeError, TypeError, NameError):
                #pass

        # Save model every 50k frames
        if total_frame_ctr % 50000 == 0:
            save_init = False
            if cur_mode == TURN:
                turn_model.save_weights('models/turn/turn-' + filename + '-' +
                                        str(START_SPEED) + '-' +
                                        str(total_frame_ctr) + '.h5',
                                        overwrite=True)
                print("Saving turn_model %s - %d - %d" %
                      (filename, START_SPEED, total_frame_ctr))

            elif cur_mode == AVOID:
                avoid_model.save_weights('models/avoid/avoid-' + filename +
                                         '-' + str(total_frame_ctr) + '.h5',
                                         overwrite=True)
                print("Saving avoid_model %s - %d" %
                      (filename, total_frame_ctr))

            elif cur_mode == ACQUIRE:
                acquire_model.save_weights('models/acquire/acquire-' +
                                           filename + '-' + str(START_SPEED) +
                                           '-' + str(total_frame_ctr) + '.h5',
                                           overwrite=True)
                print("Saving acquire_model %s - %d" %
                      (filename, total_frame_ctr))

            elif cur_mode == HUNT:
                hunt_model.save_weights('models/hunt/hunt-' + filename + '-' +
                                        str(total_frame_ctr) + '.h5',
                                        overwrite=True)
                print("Saving hunt_model %s - %d" %
                      (filename, total_frame_ctr))

            elif cur_mode == PACK:
                pack_model.save_weights('models/pack/pack-' + filename + '-' +
                                        str(total_frame_ctr) + '.h5',
                                        overwrite=True)
                print("Saving pack_model %s - %d" %
                      (filename, total_frame_ctr))

    # Log results after we're done all frames.
    log_results(filename, data_collect, loss_log)

Пример #14

Показать файл

Файл: learning.py Проект: ryanzhao29/Vision_based_obstacle_avoidance

def train_net(model, params):
    global counter
    global lastState
    global last_action
    global lastreward
    filename = params_to_filename(params)

    observe = 1000  # Number of frames to observe before training.p
    epsilon = 1
    train_frames = 1000000  # Number of frames to play.
    batchSize = params['batchSize']
    buffer = params['buffer']

    # Just stuff used below.
    max_car_distance = 0
    car_distance = 0
    t = 0
    data_collect = []
    replay = []  # stores tuples of (S, A, R, S').

    loss_log = []

    # Create a new game instance.
    game_state = carmunk.GameState()

    # Get initial state by doing nothing and getting the state.
    _, state = game_state.frame_step((2))
    # state = np.array([14,14,14,14,14,14,14,14,14])
    # state = np.expand_dims(state, axis = 0)
    # Let's time it.
    start_time = timeit.default_timer()

    # Run the frames.
    while t < train_frames:

        t += 1
        car_distance += 1

        # Choose an action.
        if random.random() < epsilon or t < observe:
            action = np.random.randint(0, 5)  # random
        else:
            # Get Q values for each action.
            qval = model.predict(train_new_state, batch_size=1)
            action = (np.argmax(qval))  # best

        # Take action, observe new state and get our treat.
        if lastreward < -100:
            lastState = state
        train_state = np.append(lastState, state[0])
        train_state = np.append(train_state, last_action)
        train_state = np.expand_dims(train_state, axis=0)

        reward, new_state = game_state.frame_step(action)
        train_new_state = np.append(state[0], new_state[0])
        train_new_state = np.append(train_new_state, action)

        train_new_state = np.expand_dims(train_new_state, axis=0)

        if sum(state[0]) >= 42:
            counter += 1
            if counter % 40 == 0:
                replay.append((train_state, action, reward, train_new_state))
                if counter > 1000000000:
                    counter = 0
        else:
            replay.append((train_state, action, reward, train_new_state))

        lastState = np.copy(state)
        state = np.copy(new_state)
        # Experience replay storage.
        last_action = action
        # If we're done observing, start training.
        if t > observe:

            # If we've stored enough in our buffer, pop the oldest.
            if len(replay) > buffer:
                replay.pop(0)

            # Randomly sample our experience replay memory
            minibatch = random.sample(replay, batchSize)

            # Get training values.batchSize
            X_train, y_train = process_minibatch(minibatch, model)

            # Train the model on this batch.
            history = LossHistory()
            batchSize1 = len(X_train)
            model.fit(X_train,
                      y_train,
                      batch_size=batchSize1,
                      nb_epoch=1,
                      verbose=0,
                      callbacks=[history])
            loss_log.append(history.losses)

        # Update the starting state with S'.

        # Decrement epsilon over time.
        if epsilon > 0.1 and t > observe:
            epsilon -= 5 * (1 / train_frames)

        # We died, so update stuff.
        lastreward = reward
        if reward == -500:
            # Log the car's distance at this T.
            data_collect.append([t, car_distance])

            # Update max.
            if car_distance > max_car_distance:
                max_car_distance = car_distance

            # Time it.
            tot_time = timeit.default_timer() - start_time
            fps = car_distance / tot_time

            # Output some stuff so we can watch.
            print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" %
                  (max_car_distance, t, epsilon, car_distance, fps))

            # Reset.
            car_distance = 0
            start_time = timeit.default_timer()

        # Save the model every 25,000 frames.
        if t % 10000 == 0:
            model.save_weights('saved-models/' + filename + '-' + str(t) +
                               '.h5',
                               overwrite=True)
            print("Saving model %s - %d" % (filename, t))

    # Log results after we're done all frames.
    log_results(filename, data_collect, loss_log)

Пример #15

Показать файл

def train_net(model, params):

    filename = params_to_filename(params)

    observe = 1000
    epsilon = 1
    train_frames = 100000
    batchSize = params['batchSize']
    buffer = params['buffer']
    max_car_distance = 0
    car_distance = 0
    t = 0
    data_collect = []
    replay = []
    loss_log = []
    game_state = UI.GameState()
    _, state = game_state.frame_step((2))
    start_time = timeit.default_timer()
    while t < train_frames:
        t += 1
        car_distance += 1
        if random.random() < epsilon or t < observe:
            action = np.random.randint(0, 3)
        else:
            qval = model.predict(state, batch_size=1)
            action = (np.argmax(qval))

        reward, new_state = game_state.frame_step(action)
        replay.append((state, action, reward, new_state))
        if t > observe:
            if len(replay) > buffer:
                replay.pop(0)

            # Randomly sample our experience replay memory
            minibatch = random.sample(replay, batchSize)

            X_train, y_train = process_minibatch2(minibatch, model)

            history = LossHistory()
            model.fit(X_train,
                      y_train,
                      batch_size=batchSize,
                      nb_epoch=1,
                      verbose=0,
                      callbacks=[history])
            loss_log.append(history.losses)

        state = new_state

        if epsilon > 0.1 and t > observe:
            epsilon -= (1.0 / train_frames)

        if reward == -500:
            data_collect.append([t, car_distance])

            if car_distance > max_car_distance:
                max_car_distance = car_distance

            tot_time = timeit.default_timer() - start_time
            fps = car_distance / tot_time

            print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" %
                  (max_car_distance, t, epsilon, car_distance, fps))

            car_distance = 0
            start_time = timeit.default_timer()

        # Save the model every 25,000 frames.
        if t % 25000 == 0:
            model.save_weights('saved-models/' + filename + '-' + str(t) +
                               '.h5',
                               overwrite=True)
            print("Saving model %s - %d" % (filename, t))

    log_results(filename, data_collect, loss_log)

Пример #16

Показать файл

def train_net(model, params):

    filename = params_to_filename(params)

    observe = 1000  # Number of frames to observe before training.
    epsilon = 1
    train_frames = 110000  # Number of frames to play.
    steps = 0
    batchSize = params['batchSize']
    buffer = params['buffer']

    # Just stuff used below.
    max_car_distance = 0
    car_distance = 0
    t = 0
    data_collect = []
    replay = []  # stores tuples of (S, A, R, S').

    loss_log = []

    # Create a new game instance.
    game_state = carmunk.GameState()

    # Get initial state by doing nothing and getting the state.
    _, state = game_state.frame_step((1))

    # Let's time it.
    start_time = timeit.default_timer()

    # Run the frames.
    while t < train_frames:

        t += 1
        car_distance += 1

        # Choose an action.
        if random.random() < epsilon or t < observe:
            action = np.random.randint(3)  # random
        else:
            # Get Q values for each action.
            qval = model.predict(state, batch_size=1)
            action = (np.argmax(qval))  # best

        # Take action, observe new state and get our treat.
        reward, new_state = game_state.frame_step(action)

        # Experience replay storage.
        replay.append((state, action, reward, new_state))

        # If we're done observing, start training.
        if t > observe:
            #print("start")
            # If we've stored enough in our buffer, pop the oldest.
            if len(replay) > buffer:
                replay.pop(0)

            # Randomly sample our experience replay memory
            minibatch = random.sample(replay, batchSize)

            # Get training values.
            X_train, y_train = process_minibatch2(minibatch, model)

            # Train the model on this batch.
            history = LossHistory()
            model.fit(X_train,
                      y_train,
                      batch_size=batchSize,
                      nb_epoch=1,
                      verbose=0,
                      callbacks=[history])
            loss_log.append(history.losses)
            steps += 1
            if steps % 1000 == 0:
                print("Step = " + str(steps), "Epsilon = " + str(epsilon))
        # Update the starting state with S'.
        state = new_state

        # Decrement epsilon over time.
        if epsilon > 0.1 and t > observe:
            epsilon -= (1.0 / train_frames)

        # We died, so update stuff.
        if reward <= -500:
            #print("Crashed.")
            # Log the car's distance at this T.
            data_collect.append([t, car_distance])

            # Reset.
            car_distance = 0
        # We reached the goal, so update stuff.
        elif reward >= 2000:
            print("Reached goal.")
            # Log the car's distance at this T.
            data_collect.append([t, car_distance])

            # Reset.
            car_distance = 0

        # Save the model every 25,000 frames.
        if t % 25000 == 0:
            model.save_weights('saved-models/' + filename + '-' + str(t) +
                               '.h5',
                               overwrite=True)
            print("Saving model %s - %d" % (filename, t))