def main():
    
    env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
    policy = ContinuousDownwardBiasPolicy()

    while True:
        obs, done = env.reset(), False
        print("===================================")        
        print("obs")
        print(obs)
        episode_rew = 0
        while not done:
            env.render(mode='human')
            act = policy.sample_action(obs, .1)
            print("Action")
            print(act)
            obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
            episode_rew += rew
        print("Episode reward", episode_rew)
def main():

    env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
    policy = ContinuousDownwardBiasPolicy()

    while True:
        obs, done = env.reset(), False
        print("===================================")
        print("obs")
        print(obs)
        episode_rew = 0
        while not done:
            env.render()
            act = policy.sample_action(obs, .1)
            print("Action")
            print(act)
            obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
            episode_rew += rew
        print("Episode reward", episode_rew)
示例#3
0
        ep_reward_list = []
        avg_reward_list = []

    actor_loss, critic_loss = 0, 0
    best_score = -np.inf
    print('Main training loop')
    for episode in range(start_episode, MAX_EPISODES):
        obsv = env.reset()
        state = np.asarray(
            obsv, dtype=np.float32) / 255.0  # convert into float array
        episodic_reward = 0
        frames = []
        steps = 0
        while True:
            if episode > MAX_EPISODES - 3:
                frames.append(env.render(mode='rgb_array'))

            # take an action as per the policy
            if episode < RAND_EPS:  # explore for some episodes
                action = env.action_space.sample()
            else:
                action = agent.policy(state)

            # obtain next state and rewards

            next_obsv, reward, done, info = env.step(action)
            next_state = np.asarray(
                next_obsv,
                dtype=np.float32) / 255.0  # convert into float array

            #tb_img = np.reshape(next_state, (-1, 48, 48, 3))  # for tensorboard