def main():
    
    env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
    policy = ContinuousDownwardBiasPolicy()

    while True:
        obs, done = env.reset(), False
        print("===================================")        
        print("obs")
        print(obs)
        episode_rew = 0
        while not done:
            env.render(mode='human')
            act = policy.sample_action(obs, .1)
            print("Action")
            print(act)
            obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
            episode_rew += rew
        print("Episode reward", episode_rew)
def main():

    env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
    policy = ContinuousDownwardBiasPolicy()

    while True:
        obs, done = env.reset(), False
        print("===================================")
        print("obs")
        print(obs)
        episode_rew = 0
        while not done:
            env.render()
            act = policy.sample_action(obs, .1)
            print("Action")
            print(act)
            obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
            episode_rew += rew
        print("Episode reward", episode_rew)
示例#3
0
        episodic_reward = 0
        frames = []
        steps = 0
        while True:
            if episode > MAX_EPISODES - 3:
                frames.append(env.render(mode='rgb_array'))

            # take an action as per the policy
            if episode < RAND_EPS:  # explore for some episodes
                action = env.action_space.sample()
            else:
                action = agent.policy(state)

            # obtain next state and rewards

            next_obsv, reward, done, info = env.step(action)
            next_state = np.asarray(
                next_obsv,
                dtype=np.float32) / 255.0  # convert into float array

            #tb_img = np.reshape(next_state, (-1, 48, 48, 3))  # for tensorboard
            tb_img = np.reshape(next_state,
                                (-1, ) + state_size)  # for tensorboard

            with train_summary_writer.as_default():
                tf.summary.image("Training Image", tb_img, step=episode)
                tf.summary.histogram("action_vector", action, step=steps)

            episodic_reward += reward

            # print('reward:', episodic_reward)