Exemplo n.º 1
0
def main ():
    Engine_module = importlib.import_module('Envs.env_{}'.format(opt.action_id))
    Engine = getattr(Engine_module,'Engine{}'.format(opt.action_id))
    if opt.use_cycle:
        opt.load_cycle = Frame_transfer (opt)

    if opt.use_dmp:
        opt.load_dmp = DMP(opt)
        opt.each_action_lim = opt.each_action_lim*opt.cut_frame_num*opt.dmp_ratio

    if opt.video_reward:
        test_path = os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id))
        if not os.path.exists(test_path):
            os.mkdir(test_path)
        evaluator = Frame_eval (img_path=os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id), 'epoch-0'),
                               frame_len=opt.cut_frame_num,
                               start_id=0,
                               memory_path=os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id), 'memory'),
                               class_label=opt.action_id,
                               opt = opt)
        opt.load_video_pred = evaluator

    if opt.gui:
        opt.p = bc.BulletClient (connection_mode=pybullet.GUI)
    else:
        opt.p = bc.BulletClient (connection_mode=pybullet.DIRECT)

    env = eval('Engine(opt)'.format(opt.action_id))

    state_dim = env.observation_space
    action_dim = len (env.action_space['high'])
    max_action = env.action_space['high'][0]
    min_Val = torch.tensor (1e-7).float ().to (device)  # min value

    if opt.use_embedding:
        if opt.nlp_embedding:
            # agent = TD3_embedding_nlp(state_dim, action_dim, max_action, env.log_root, opt)
            # agent = TD3_new(state_dim, action_dim, max_action, env.log_root, opt)
            agent = TD3_final(state_dim, action_dim, max_action, env.log_root, opt)
        else:
            agent = TD3_embedding (state_dim, action_dim, max_action, env.log_root, opt)
    else:
        agent = TD3 (state_dim, action_dim, max_action, env.log_root, opt)
    ep_r = 0

    if opt.mode == 'test':
        agent.load (2000)
        for i in range (opt.iteration):
            state = env.reset ()
            for t in range (100):

                action = agent.select_action (state)
                next_state, reward, done, info = env.step (np.float32 (action))
                # if opt.use_embedding:
                #     ep_r += reward[np.where (next_state[0] == 1)[0][0]]
                # else:
                #     ep_r += reward
                # env.render ()
                if done or t == 2000:
                    print ("Ep_i \t{}, the ep_r is \t{:0.2f}, the step is \t{}".format (i, ep_r, t))
                    break
                state = next_state

    elif opt.mode == 'train':
        print ("====================================")
        print ("Collection Experience...")
        print ("====================================")
        # if opt.load: agent.load()

        buffer_root = '/scr1/system/gamma-robot/scripts/utils/buffer/211'
        action_all = np.load(os.path.join(buffer_root,'action_all.npy'))
        target_all = np.load(os.path.join(buffer_root,'target_all.npy'))
        rank_all = np.load(os.path.join(buffer_root,'rank_all.npy'))
        reward_all = np.load(os.path.join(buffer_root,'reward_all.npy'))

        for i in range (opt.num_iteration):
            if i<4000:
                continue
            target = target_all[i]
            state = env.reset (target)

            # file = os.path.join(opt.project_root,'logs','tds_log','test{}'.format(211),'epoch-{}'.format(i))
            # log_reader = open(file,'r')
            # for line in log_reader.readlines():
            #     line = line.strip().split(':')
            #     if line[0]=='target':
            #         target = int(line[1])
            #     if line[0]=='action':

            action = action_all[i]

            print('epoch id:{}, action:{}'.format(i,str(action)))
            next_state, reward, done, info = env.step (action)

            print(reward,reward_all[i],rank_all[i])

    else:
        raise NameError ("mode wrong!!!")
Exemplo n.º 2
0
def main():
    assert (opt.mode == 'test')
    if opt.mode == 'test':
        opt.test_id = 8888

    Engine_module = importlib.import_module('Envs.env_{}'.format(
        opt.action_id))
    Engine = getattr(Engine_module, 'Engine{}'.format(opt.action_id))
    if opt.use_cycle:
        opt.load_cycle = Frame_transfer(opt)

    if opt.use_dmp:
        opt.load_dmp = DMP(opt)
        opt.each_action_lim = opt.each_action_lim * opt.cut_frame_num * opt.dmp_ratio

    if opt.video_reward:
        test_path = os.path.join(opt.project_root,
                                 'logs/td3_log/test{}'.format(opt.test_id))
        if not os.path.exists(test_path):
            os.mkdir(test_path)
        evaluator = Frame_eval(
            img_path=os.path.join(opt.project_root,
                                  'logs/td3_log/test{}'.format(opt.test_id),
                                  'epoch-0'),
            frame_len=opt.cut_frame_num,
            start_id=0,
            memory_path=os.path.join(opt.project_root,
                                     'logs/td3_log/test{}'.format(opt.test_id),
                                     'memory'),
            class_label=opt.action_id,
            opt=opt)
        opt.load_video_pred = evaluator

    if opt.gui:
        opt.p = bc.BulletClient(connection_mode=pybullet.GUI)
    else:
        opt.p = bc.BulletClient(connection_mode=pybullet.DIRECT)

    env = eval('Engine(opt)'.format(opt.action_id))

    state_dim = env.observation_space
    action_dim = len(env.action_space['high'])
    max_action = env.action_space['high'][0]
    min_Val = torch.tensor(1e-7).float().to(device)  # min value

    if opt.use_embedding:
        if opt.nlp_embedding:
            # agent = TD3_embedding_nlp(state_dim, action_dim, max_action, env.log_root, opt)
            # agent = TD3_new(state_dim, action_dim, max_action, env.log_root, opt)
            agent = TD3_final(state_dim, action_dim, max_action, env.log_root,
                              opt)
        else:
            agent = TD3_embedding(state_dim, action_dim, max_action,
                                  env.log_root, opt)
    else:
        agent = TD3(state_dim, action_dim, max_action, env.log_root, opt)

    if opt.mode == 'test':
        weight_id = 1400
        test_file = open(
            os.path.join(test_path, 'test_{}.txt'.format(weight_id)), 'w')
        # agent.load (4400)
        agent.load(weight_id)

        for target in opt.embedding_list:
            state = env.reset(target)
            action = agent.select_action(state)
            action = action.clip(-max_action, max_action)
            next_state, reward, done, info = env.step(action)

            reward_id = np.where(
                np.array(env.opt.embedding_list) ==
                env.opt.load_embedding)[0][0]
            test_file.write('{}\n'.format(reward[reward_id]))
            print(action)

    else:
        raise NameError("mode wrong!!!")
Exemplo n.º 3
0
def show_one_policy ():
    Engine_module = importlib.import_module('Envs.env_{}'.format(opt.action_id))
    Engine = getattr(Engine_module,'Engine{}'.format(opt.action_id))
    if opt.use_cycle:
        opt.load_cycle = Frame_transfer (opt)

    if opt.use_dmp:
        opt.load_dmp = DMP(opt)
        opt.each_action_lim = opt.each_action_lim*opt.cut_frame_num*opt.dmp_ratio

    if opt.video_reward:
        test_path = os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id))
        if not os.path.exists(test_path):
            os.mkdir(test_path)
        evaluator = Frame_eval (img_path=os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id), 'epoch-0'),
                               frame_len=opt.cut_frame_num,
                               start_id=0,
                               memory_path=os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id), 'memory'),
                               class_label=opt.action_id,
                               opt = opt)
        opt.load_video_pred = evaluator

    if opt.gui:
        opt.p = bc.BulletClient (connection_mode=pybullet.GUI)
    else:
        opt.p = bc.BulletClient (connection_mode=pybullet.DIRECT)

    env = eval('Engine(opt)'.format(opt.action_id))

    state_dim = env.observation_space
    action_dim = len (env.action_space['high'])
    max_action = env.action_space['high'][0]

    if opt.use_embedding:
        agent = TD3_embedding (state_dim, action_dim, max_action, env.log_root, opt)
    else:
        agent = TD3 (state_dim, action_dim, max_action, env.log_root, opt)


    assert(opt.mode == 'test')
    agent.load (2000)
    state = env.reset ()

    inter_n = 10.

    fig = plt.figure ()
    ax = Axes3D (fig)
    # ax.view_init(elev=45,azim=0)
    X = np.arange (0, 1+1/inter_n, 1/inter_n)
    Y = np.arange (0, 1+1/inter_n, 1/inter_n)
    X, Y = np.meshgrid (X, Y)
    R = np.sqrt (X ** 2 + Y ** 2)
    Z = np.sin (R)

    help(ax.plot_surface)

    for i in range(int(inter_n+1)):
        for j in range(int(inter_n+1)):
            state[0] = np.array ([i/inter_n, 0, 0, j/inter_n])
            action = agent.select_action (state)
            obj = env.p.getAABB (env.obj_id, -1)
            obj_center = [(x + y) * 0.5 for x, y in zip (obj[0], obj[1])]
            world_pos = [(x + y) for x, y in zip (obj_center, action)]
            world_pos.append (1)
            camera_pos = np.array (env.view_matrix).reshape (4, -1).T.dot (np.array (world_pos))
            camera_pos = [x / camera_pos[-1] for x in camera_pos]

            print((i,j),camera_pos)
            Z[i][j] = camera_pos[1]

    ax.plot_surface (X, Y, Z, rstride=1, cstride=1, cmap='rainbow')
    plt.show ()

    plt.cla ()
    fig = plt.figure ()
    ax = Axes3D (fig)
    ax.view_init (elev=45, azim=90)
    ax.plot_surface (X, Y, Z, rstride=1, cstride=1, cmap='rainbow')
    plt.show ()
Exemplo n.º 4
0
def main():
    Engine_module = importlib.import_module('Envs.env_{}'.format(
        opt.action_id))
    Engine = getattr(Engine_module, 'Engine{}'.format(opt.action_id))
    if opt.use_cycle:
        opt.load_cycle = Frame_transfer(opt)

    if opt.use_dmp:
        opt.load_dmp = DMP(opt)
        opt.each_action_lim = opt.each_action_lim * opt.cut_frame_num * opt.dmp_ratio

    if opt.video_reward:
        test_path = os.path.join(opt.project_root,
                                 'logs/td3_log/test{}'.format(opt.test_id))
        if not os.path.exists(test_path):
            os.mkdir(test_path)
        evaluator = Frame_eval(
            img_path=os.path.join(opt.project_root,
                                  'logs/td3_log/test{}'.format(opt.test_id),
                                  'epoch-0'),
            frame_len=opt.cut_frame_num,
            start_id=0,
            memory_path=os.path.join(opt.project_root,
                                     'logs/td3_log/test{}'.format(opt.test_id),
                                     'memory'),
            class_label=opt.action_id,
            opt=opt)
        opt.load_video_pred = evaluator

    if opt.gui:
        opt.p = bc.BulletClient(connection_mode=pybullet.GUI)
    else:
        opt.p = bc.BulletClient(connection_mode=pybullet.DIRECT)

    env = eval('Engine(opt)'.format(opt.action_id))

    state_dim = env.observation_space
    action_dim = len(env.action_space['high'])
    max_action = env.action_space['high'][0]
    min_Val = torch.tensor(1e-7).float().to(device)  # min value

    if opt.use_embedding:
        if opt.nlp_embedding:
            # agent = TD3_embedding_nlp(state_dim, action_dim, max_action, env.log_root, opt)
            # agent = TD3_new(state_dim, action_dim, max_action, env.log_root, opt)
            agent = TD3_final(state_dim, action_dim, max_action, env.log_root,
                              opt)
        else:
            agent = TD3_embedding(state_dim, action_dim, max_action,
                                  env.log_root, opt)
    else:
        agent = TD3(state_dim, action_dim, max_action, env.log_root, opt)
    ep_r = 0

    if opt.mode == 'test':
        agent.load(2000)
        for i in range(opt.iteration):
            state = env.reset()
            for t in range(100):

                action = agent.select_action(state)
                next_state, reward, done, info = env.step(np.float32(action))
                # if opt.use_embedding:
                #     ep_r += reward[np.where (next_state[0] == 1)[0][0]]
                # else:
                #     ep_r += reward
                # env.render ()
                if done or t == 2000:
                    print("Ep_i \t{}, the ep_r is \t{:0.2f}, the step is \t{}".
                          format(i, ep_r, t))
                    break
                state = next_state

    elif opt.mode == 'train':
        print("====================================")
        print("Collection Experience...")
        print("====================================")
        if opt.load:
            agent.load(7000)

        if opt.add_buffer:
            # add exploration data into buffer
            buffer_path = os.path.join(opt.project_root, 'scripts', 'utils',
                                       'buffer', 'buffer-100k')
            all_action = np.load(os.path.join(buffer_path, 'action_all.npy'))
            all_reward = np.load(os.path.join(buffer_path, 'reward_all.npy'))
            all_embedding = np.load(
                os.path.join(buffer_path, 'embedding_all.npy'))
            all_target = np.load(os.path.join(buffer_path, 'target_all.npy'))
            all_rank = np.load(os.path.join(buffer_path, 'rank_all.npy'))
            task_state = np.load(os.path.join(buffer_path, 'state.npy'))

            for i in range(all_action.shape[0]):
                if not (all_target[i] in opt.fine_tune_list):
                    continue
                print('add buffer data:{}'.format(i))
                state = (all_embedding[i], task_state[all_target[i]])
                next_state = (all_embedding[i], task_state[all_target[i]])
                action = all_action[i]
                reward = all_reward[i]
                done = True
                agent.memory.push(
                    (state, next_state, action, reward, np.float(done)))

                reward_id = np.where(
                    np.array(env.opt.embedding_list) == all_target[i])[0][0]
                ep_r = reward[reward_id]

                if ep_r > 0:
                    for push_t in range(4):
                        agent.memory.push((state, next_state, action, reward,
                                           np.float(done)))

        for i in range(opt.num_iteration):
            state = env.reset()
            for t in range(2000):

                action = agent.select_action(state)
                action = action + np.random.normal(
                    0, max_action * opt.noise_level, size=action.shape)
                action = action.clip(-max_action, max_action)

                print('epoch id:{}, action:{}'.format(i, str(action)))
                next_state, reward, done, info = env.step(action)

                if opt.use_embedding:
                    reward_id = np.where(
                        np.array(env.opt.embedding_list) ==
                        env.opt.load_embedding)[0][0]
                    ep_r += reward[reward_id]
                else:
                    ep_r += reward

                # if opt.render and i >= opt.render_interval : env.render()
                agent.memory.push(
                    (state, next_state, action, reward, np.float(done)))

                if ep_r > 0:
                    for push_t in range(4):
                        agent.memory.push((state, next_state, action, reward,
                                           np.float(done)))

                if i + 1 % 10 == 0:
                    print('Episode {},  The memory size is {} '.format(
                        i, len(agent.memory.storage)))
                if len(agent.memory.storage) >= opt.start_train - 1:
                    agent.update(opt.update_time)
                    opt.noise_level = opt.noise_training_level

                state = next_state
                if done or t == opt.max_episode - 1:
                    agent.writer.add_scalar('ep_r', ep_r, global_step=i)
                    if i % opt.print_log == 0:
                        print(
                            "Ep_i \t{}, the ep_r is \t{:0.2f}, the step is \t{}"
                            .format(i, ep_r, t))
                    ep_r = 0
                    break

            if i % opt.log_interval == 0:
                agent.save(i)

    else:
        raise NameError("mode wrong!!!")
Exemplo n.º 5
0
def analysis_nlp_embedding(cmd='move sth down'):
    assert (opt.mode == 'test')
    if opt.mode == 'test':
        opt.test_id = 8888

    Engine_module = importlib.import_module ('Envs.env_{}'.format (opt.action_id))
    Engine = getattr (Engine_module, 'Engine{}'.format (opt.action_id))
    if opt.use_cycle:
        opt.load_cycle = Frame_transfer (opt)

    if opt.use_dmp:
        opt.load_dmp = DMP (opt)
        opt.each_action_lim = opt.each_action_lim * opt.cut_frame_num * opt.dmp_ratio

    if opt.video_reward:
        test_path = os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id))
        if not os.path.exists (test_path):
            os.mkdir (test_path)
        evaluator = Frame_eval (
            img_path=os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id), 'epoch-0'),
            frame_len=opt.cut_frame_num,
            start_id=0,
            memory_path=os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id), 'memory'),
            class_label=opt.action_id,
            opt=opt)
        opt.load_video_pred = evaluator

    if opt.gui:
        opt.p = bc.BulletClient (connection_mode=pybullet.GUI)
    else:
        opt.p = bc.BulletClient (connection_mode=pybullet.DIRECT)

    env = eval ('Engine(opt)'.format (opt.action_id))

    state_dim = env.observation_space
    action_dim = len (env.action_space['high'])
    max_action = env.action_space['high'][0]
    min_Val = torch.tensor (1e-7).float ().to (device)  # min value

    if opt.use_embedding:
        if opt.nlp_embedding:
            # agent = TD3_embedding_nlp(state_dim, action_dim, max_action, env.log_root, opt)
            # agent = TD3_new(state_dim, action_dim, max_action, env.log_root, opt)
            agent = TD3_final (state_dim, action_dim, max_action, env.log_root, opt)
        else:
            agent = TD3_embedding (state_dim, action_dim, max_action, env.log_root, opt)
    else:
        agent = TD3 (state_dim, action_dim, max_action, env.log_root, opt)

    assert(opt.mode == 'test')
    agent.load (1400)
    state = env.reset ()
    bert_engine = BertClient(port=5575, port_out=5576)

    for i in range(100):
        cmd = 'moving something close to something'
        target = 42
        state = env.reset (target=target)
        embedding = bert_engine.encode([cmd])
        state[0] = embedding[0]
        action = agent.select_action (state)