def collect_episode(bc, env): """ :param bc: BehaviorCloning :param env: OpenAI gym environment """ state = env.reset() episode = Episode(env.discount) done = False while not done: action, action_prob = bc.single_action(state) next_state, reward, done, _ = env.step(action) transition = Transition(state, action, action_prob, reward, next_state, done) state = next_state episode.insert(transition) return episode
def collect_episode(dqn, env, eps): """ :param dqn: DQN :param env: OpenAI gym environment :param eps: rate of epsilon greedy exploration """ state = env.reset() episode = Episode(env.discount) done = False while not done: action, action_prob = dqn.single_action(state, eps) next_state, reward, done, _ = env.step(action) transition = Transition(state, action, action_prob, reward, next_state, done) state = next_state episode.insert(transition) return episode