Пример #1
0
def run_sim(rank, params, shared_model, shared_optimizer, count, lock):
    if not os.path.exists('./' + params.weight_dir):
        os.mkdir('./' + params.weight_dir)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    logging.basicConfig(filename='./log/' + params.log_file + '.log',
                        level=logging.INFO)

    ptitle('Training Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)]
    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')

    torch.manual_seed(random.randint(0, 1000) + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(random.randint(0, 1000) + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()

    Agent = run_agent(model, gpu_id)
    house_id = params.house_id

    if house_id == -1:
        house_id = rank
    if house_id > 50:
        house_id = house_id % 50

    env = Environment(api, get_house_id(house_id, params.difficulty), cfg)
    task = RoomNavTask(env,
                       hardness=params.hardness,
                       segment_input=params.semantic_mode,
                       max_steps=params.max_steps,
                       discrete_action=True)

    n_train = 0
    best_rate = 0.0
    save_model_index = 0

    while True:
        n_train += 1
        training(task, gpu_id, shared_model, Agent, shared_optimizer, params,
                 lock, count)

        if n_train % 1000 == 0:
            with lock:
                n_update = count.value
            with torch.cuda.device(gpu_id):
                Agent.model.load_state_dict(shared_model.state_dict())

            start_time = time.time()
            best_rate, save_model_index = testing(lock, n_update, gpu_id,
                                                  Agent, task, best_rate,
                                                  params, save_model_index,
                                                  start_time, logging,
                                                  house_id)
Пример #2
0
def train_dagger_agent(expert_policy_file, config):
    # Get the actions that the expert would do given the observations
    expert = load_policy(expert_policy_file)

    agent, train_losses, test_losses, dataset = train_bc_agent(
        expert_policy_file, config)

    for i in range(config.num_dagger_iterations):
        # Get observations using the policy we just trained
        _, observations, _ = run_agent(agent_wrapper(agent, config), config)
        actions = [expert(obs[None, :]) for obs in observations]

        # Create a new training set that uses all the data
        observations = torch.Tensor(observations).to(config.device)
        actions = torch.Tensor(actions).squeeze(1).to(config.device)

        dagger_data = data.TensorDataset(observations, actions)
        dataset = data.ConcatDataset([dataset, dagger_data])

        N = len(dataset)
        train_length = int(config.split * N)
        test_length = N - train_length

        trainset, valset = data.random_split(dataset,
                                             [train_length, test_length])

        train_loader = data.DataLoader(trainset, batch_size=128, shuffle=True)
        test_loader = data.DataLoader(valset, batch_size=128)

        train_loss, test_loss = train_epochs(agent,
                                             train_loader,
                                             test_loader,
                                             epochs=config.epochs,
                                             lr=config.lr)

        train_losses += train_loss
        test_losses += test_loss

    return agent, train_losses, test_losses
Пример #3
0
def learning(params, state_Queue, action_done, actions, reward_Queue):
    if not os.path.exists('./' + params.weight_dir):
        os.mkdir('./' + params.weight_dir)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    logging.basicConfig(filename='./log/' + params.log_file + '.log',
                        level=logging.INFO)

    n_process = params.n_process

    n_inference = 0
    n_update = 0

    Agent = run_agent(params)
    # Agent.initialize()

    list_dones = [False] * n_process

    n_result = 0
    test_done = [0] * n_process
    test_succ = [0] * n_process

    best_rate = 0

    start_time = time.time()
    while True:
        list_obs, list_target, list_dones = getState(n_process, state_Queue,
                                                     list_dones)
        # do inference and make action
        # actions.value = [random.randrange(len(action_list))] * n_process
        inference, value, log_prob, entropy = Agent.action_train(
            list_obs, list_target)
        n_inference += 1

        actions = putActions(n_process, inference, actions, state_Queue)

        torch_rewards, list_dones, masks = getReward(n_process, reward_Queue,
                                                     list_dones)

        Agent.insert(log_prob, value, torch_rewards, masks, entropy)

        if n_inference % 30 == 0 and n_inference != 0:
            next_obs = list_obs, list_target
            # Agent.update(params, next_obs, list_dones, entropy)
            Agent.update_sync(params, next_obs)
            n_update += 1

            if n_update % 200 == 0:  # test
                actions, test_done, test_succ, list_rewards = EnvReset(
                    n_process, state_Queue, action_done, list_dones, actions)

                endTest = False
                while True:
                    list_obs, list_target, list_dones = getState(
                        n_process, state_Queue, list_dones)

                    if endTest is True:
                        best_rate = writeResult(n_process, test_done,
                                                test_succ, best_rate, Agent,
                                                params, n_update, start_time,
                                                logging)
                        actions, test_done, test_succ = callReset(
                            n_process, actions, state_Queue)
                        break

                    # do inference and make action
                    inference = Agent.action_test(list_obs, list_target)
                    putActions(n_process, inference, actions, state_Queue)

                    for i in range(n_process):
                        rank, done, reward = reward_Queue.get()
                        list_rewards[rank] = [reward]
                        list_dones[rank] = True

                        if done:
                            if test_done[rank] < params.n_eval:
                                test_done[rank] += 1
                                if reward == 10:  # success
                                    test_succ[rank] += 1

                    for i in range(n_process):
                        reward_Queue.task_done()

                    endTest = DoneOrNot(n_process, test_done, params.n_eval)
Пример #4
0
def run_sim(rank, params, shared_model, shared_optimizer, count, lock):
    ptitle('Training Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)]
    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')

    if shared_optimizer is None:
        optimizer = optim.Adam(shared_model.parameters(),
                               lr=params.lr,
                               amsgrad=params.amsgrad,
                               weight_decay=params.weight_decay)
        #optimizer.share_memory()
    else:
        optimizer = shared_optimizer

    torch.manual_seed(params.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(params.seed + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()

    Agent = run_agent(model, gpu_id)

    house_id = params.house_id
    if house_id == -1:
        house_id = rank
    if house_id >= 20:
        house_id = house_id % 20

    env = Environment(api, get_house_id(house_id), cfg)
    task = RoomNavTask(env,
                       hardness=params.hardness,
                       segment_input=params.semantic_mode,
                       max_steps=params.max_steps,
                       discrete_action=True)

    for episode in range(params.max_episode):
        next_observation = task.reset()
        target = task.info['target_room']
        target = get_instruction_idx(target)

        with torch.cuda.device(gpu_id):
            target = Variable(torch.LongTensor(target)).cuda()
            Agent.model.load_state_dict(shared_model.state_dict())
            Agent.cx = Variable(torch.zeros(1, 256).cuda())
            Agent.hx = Variable(torch.zeros(1, 256).cuda())
            Agent.target = target

        total_reward, num_steps, good = 0, 0, 0
        Agent.done = False
        done = False
        Agent.eps_len = 0

        while not done:
            num_steps += 1
            observation = next_observation
            act, entropy, value, log_prob = Agent.action_train(
                observation, target)
            next_observation, reward, done, info = task.step(actions[act[0]])

            rew = np.clip(reward, -1.0, 1.0)

            Agent.put_reward(rew, entropy, value, log_prob)
            if num_steps % params.num_steps == 0 or done:
                if done:
                    Agent.done = done
                with lock:
                    count.value += 1
                Agent.training(next_observation, shared_model, optimizer,
                               params)

            if done:
                break
Пример #5
0
def test(rank, params, shared_model, count, lock):
    logging.basicConfig(filename='./2blocks_rew.log', level=logging.INFO)
    ptitle('Test Process: {}'.format(rank))
    gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)]

    env = Env(True, 1, down_period=2)

    # model = A3C()
    model = A3C_LSTM()
    with torch.cuda.device(gpu_id):
        model = model.cuda()
    agent = run_agent(model, gpu_id)

    episode = 0
    while episode <= params.episode_test:
        env.reset()
        with lock:
            n_update = count.value
        agent.synchronize(shared_model)

        num_steps = 0
        accumulated_reward = 0
        nAction = 0
        line1 = 0
        line2 = 0
        line3 = 0
        line4 = 0
        nMove = 0
        rew_height = 0
        rew_move = 0

        while True:
            num_steps += 1

            obs = pre_processing(env.shadow_map,
                                 env._get_curr_block_pos())  # env.map
            action = agent.action_test(obs)
            if action == 5:
                action = 100000
            rew, shadow_reward, done, putting, height = env.step(
                action)  # what is the 'is_new_block'?
            if rew == 0.0 and action != 3 and action != 4:
                nMove += 1
                if nMove < 6:
                    rew_move += 0.2
                if putting:
                    rew_height += -(height / 20.0)
                    nMove = 0

            if rew == 1.0:
                line1 += 1
            elif rew == 8.0:
                line2 += 1
            elif rew == 27.0:
                line3 += 1
            elif rew == 64:
                line4 += 1
            '''
            if nAction < 9:
                obs = pre_processing(env.map, env._get_curr_block_pos())
                action = agent.action_test(obs)
                rew, shadow_reward, is_new_block = env.step(action)     # what is the 'is_new_block'?
                nAction += 1

            else:
                rew, is_new_block = env.step(100000)  # falling
                nAction = 0
            '''

            accumulated_reward = rew + rew_move + rew_height

            if env.is_game_end():
                episode += 1
                print(" ".join([
                    "-------------episode stats-------------\n",
                    "nUpdate: {}\n".format(n_update),
                    "line1: {}\n".format(line1), "line2: {}\n".format(line2),
                    "line3: {}\n".format(line3),
                    "line4: {}\n".format(line4), "all_lines: {}\n".format(
                        str(line1 + line2 + line3 + line4)),
                    "score: {}\n".format(env.score),
                    "rew_move: {}\n".format(rew_move),
                    "rew_height: {}\n".format(rew_height),
                    "steps: {}\n".format(num_steps)
                ]))
                logging.info(" ".join([
                    "-------------episode stats-------------\n",
                    "nUpdate: {}\n".format(n_update),
                    "line1: {}\n".format(line1), "line2: {}\n".format(line2),
                    "line3: {}\n".format(line3),
                    "line4: {}\n".format(line4), "all_lines: {}\n".format(
                        str(line1 + line2 + line3 + line4)),
                    "score: {}\n".format(env.score),
                    "rew_move: {}\n".format(rew_move),
                    "rew_height: {}\n".format(rew_height),
                    "steps: {}\n".format(num_steps)
                ]))
                break

            if env.score > 1000:
                episode += 1
                print(" ".join([
                    "-------------episode stats-------------\n",
                    "nUpdate: {}\n".format(n_update),
                    "line1: {}\n".format(line1), "line2: {}\n".format(line2),
                    "line3: {}\n".format(line3),
                    "line4: {}\n".format(line4), "all_lines: {}\n".format(
                        str(line1 + line2 + line3 + line4)),
                    "score: {}\n".format(env.score),
                    "rew_move: {}\n".format(rew_move),
                    "rew_height: {}\n".format(rew_height),
                    "steps: {}\n".format(num_steps)
                ]))
                with torch.cuda.device(gpu_id):
                    torch.save(agent.model.state_dict(),
                               './weight/model' + str(n_update) + '.ckpt')
                logging.info(" ".join([
                    "-------------episode stats-------------\n",
                    "nUpdate: {}\n".format(n_update),
                    "line1: {}\n".format(line1), "line2: {}\n".format(line2),
                    "line3: {}\n".format(line3),
                    "line4: {}\n".format(line4), "all_lines: {}\n".format(
                        str(line1 + line2 + line3 + line4)),
                    "score: {}\n".format(env.score),
                    "rew_move: {}\n".format(rew_move),
                    "rew_height: {}\n".format(rew_height),
                    "steps: {}\n".format(num_steps)
                ]))
                break
Пример #6
0
def run_expert(expert_policy_file, config):
    policy_net = load_policy.load_policy(expert_policy_file)

    return run_agent(policy_net, config)
Пример #7
0
def test(rank, params, shared_model, count, lock, best_acc, evaluation=True):
    if not os.path.exists('./' + params.weight_dir):
        os.mkdir('./' + params.weight_dir)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    logging.basicConfig(filename='./log/' + params.log_file + '.log',
                        level=logging.INFO)
    ptitle('Test Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)]

    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')
    best_rate = 0.0
    save_model_index = 0
    n_update = 0

    torch.manual_seed(params.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(params.seed + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()

    Agent = run_agent(model, gpu_id)

    house_id = params.house_id
    if house_id == -1:
        house_id = rank
    if house_id >= 20:
        house_id = house_id % 20

    #time.sleep(rank*30)

    env = Environment(api, get_house_id(house_id), cfg)
    task = RoomNavTask(env,
                       hardness=params.hardness,
                       segment_input=params.semantic_mode,
                       max_steps=params.max_steps,
                       discrete_action=True)  #reward_type='indicator'

    start_time = time.time()

    if evaluation is True:
        max_episode = params.max_episode
        n_try = params.n_eval
    else:
        max_episode = 1  # for loaded model test
        n_try = params.n_test

    for episode in range(max_episode):
        eval = []
        if evaluation is True:
            with lock:
                n_update = count.value
            with torch.cuda.device(gpu_id):
                Agent.model.load_state_dict(shared_model.state_dict())
        else:
            with torch.cuda.device(gpu_id):
                Agent.model.load_state_dict(shared_model)
        Agent.model.eval()

        for i in range(n_try):
            next_observation = task.reset()
            target = task.info['target_room']
            target = get_instruction_idx(target)

            with torch.cuda.device(gpu_id):
                target = Variable(torch.LongTensor(target)).cuda()
                Agent.cx = Variable(torch.zeros(1, 256).cuda())
                Agent.hx = Variable(torch.zeros(1, 256).cuda())
                Agent.target = target
            step, total_rew, good = 0, 0, 0
            done = False

            while not done:
                observation = next_observation
                act = Agent.action_test(observation, target)

                next_observation, rew, done, info = task.step(actions[act[0]])
                total_rew += rew

                if rew == 10:  # success
                    good = 1

                step += 1

                if done:
                    break
            eval.append((step, total_rew, good))

        if len(eval) > 0:
            succ = [e for e in eval if e[2] > 0]
            succ_rate = (len(succ) / len(eval)) * 100

            if evaluation is True:  # evaluation mode
                with lock:
                    #if best_acc.value >= best_rate:
                    #    best_rate = best_acc.value
                    if succ_rate >= best_rate:
                        best_rate = succ_rate
                        with torch.cuda.device(gpu_id):
                            torch.save(
                                Agent.model.state_dict(), params.weight_dir +
                                'model' + str(n_update) + '.ckpt')
                        save_model_index += 1
                    #if best_rate > best_acc.value:
                    #    best_acc.value = best_rate

            avg_reward = sum([e[1] for e in eval]) / len(eval)
            avg_length = sum([e[0] for e in eval]) / len(eval)
            msg = " ".join([
                "++++++++++ Task Stats +++++++++++\n", "Time {}\n".format(
                    time.strftime("%dd %Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time))),
                "Episode Played: {:d}\n".format(len(eval)),
                "N_Update = {:d}\n".format(n_update),
                "House id: {:d}\n".format(house_id),
                "Avg Reward = {:5.3f}\n".format(avg_reward),
                "Avg Length = {:.3f}\n".format(avg_length),
                "Best rate {:3.2f}, Success rate {:3.2f}%".format(
                    best_rate, succ_rate)
            ])
            print(msg)
            logging.info(msg)
Пример #8
0
def run_test(rank, params, loaded_model, lock, seen_succ, seen_length,
             unseen_succ, unseen_length):

    logging.basicConfig(filename='./log/' + params.log_file + '.log',
                        level=logging.INFO)
    ptitle('Test Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)]

    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')

    torch.manual_seed(params.seed + rank)
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            torch.cuda.manual_seed(params.seed + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()
        load_model = torch.load(
            loaded_model,
            map_location=lambda storage, loc: storage.cuda(gpu_id))
        model.load_state_dict(load_model)
        model.eval()

    Agent = run_agent(model, gpu_id)

    n_test = 0
    start_time = time.time()

    while True:
        house_id = rank + (n_test * params.n_process)

        if house_id >= 70:
            break
        else:
            if house_id < 20:
                seen = True
                house = get_house_id(house_id)
            else:
                seen = False
                house = get_eval_house_id(house_id -
                                          (n_test * params.n_process))

        env = Environment(api, house, cfg)
        task = RoomNavTask(env,
                           hardness=params.hardness,
                           segment_input=params.semantic_mode,
                           max_steps=params.max_steps,
                           discrete_action=True)  #reward_type='indicator'

        eval = []
        for i in range(params.n_test):
            next_observation = task.reset()
            target = task.info['target_room']
            target = get_instruction_idx(target)

            with torch.cuda.device(gpu_id):
                target = Variable(torch.LongTensor(target)).cuda()
                Agent.cx = Variable(torch.zeros(1, 256).cuda())
                Agent.hx = Variable(torch.zeros(1, 256).cuda())
                Agent.target = target
            step, total_rew, good = 0, 0, 0
            done = False

            while not done:
                observation = next_observation
                act = Agent.action_test(observation, target)

                next_observation, rew, done, info = task.step(actions[act[0]])
                total_rew += rew

                if rew == 10:  # success
                    good = 1

                step += 1

                if done:
                    break
            eval.append((step, total_rew, good))

        if len(eval) > 0:
            succ = [e for e in eval if e[2] > 0]
            succ_rate = (len(succ) / len(eval)) * 100

            avg_reward = sum([e[1] for e in eval]) / len(eval)
            avg_length = sum([e[0] for e in eval]) / len(eval)
            if seen:
                msg_seen = "Seen"
                msg_house = house_id
            else:
                msg_seen = "Unseen"
                msg_house = house_id - 20

            msg = " ".join([
                "++++++++++ Task Stats +++++++++++\n", "Time {}\n".format(
                    time.strftime("%dd %Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time))),
                "Episode Played: {:d}\n".format(len(eval)),
                "{:s} House id: {:d}\n".format(msg_seen, msg_house),
                "Avg Reward = {:5.3f}\n".format(avg_reward),
                "Avg Length = {:.3f}\n".format(avg_length),
                "Success rate {:3.2f}%".format(succ_rate)
            ])
            print(msg)
            logging.info(msg)
            with lock:
                if seen:
                    seen_succ.value += len(succ)
                    seen_length.value += sum([e[0] for e in eval])
                else:
                    unseen_succ.value += len(succ)
                    unseen_length.value += sum([e[0] for e in eval])
            n_test += 1
Пример #9
0
def run_loop(rank, params, shared_model, shared_optimizer, count, lock):
    ptitle('Training Process: {}'.format(rank))
    gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)]

    env = Env(False, 1, down_period=2)

    # model = A3C()
    model = A3C_LSTM()
    with torch.cuda.device(gpu_id):
        model = model.cuda()
    agent = run_agent(model, gpu_id)

    episode = 0
    while episode <= params.episode:
        env.reset()
        agent.done = False
        num_steps = 0

        agent.synchronize(shared_model)
        nAction = 0

        nMove = 0

        while True:
            num_steps += 1
            # random_action = random.randrange(0, 5)
            '''
            if nAction < 9:
                obs = pre_processing(env.map, env._get_curr_block_pos())
                action, value, log_prob, entropy = agent.action_train(obs)
                rew, is_new_block = env.step(action)     # what is the 'is_new_block'?
                nAction += 1
                if nAction != 9:
                    rew = np.clip(rew, 0.0, 64.0)
                    agent.put_reward(rew, value, log_prob, entropy)
            else:
                rew, is_new_block = env.step(100000)  # falling
                rew = np.clip(rew, 0.0, 64.0)
                agent.put_reward(rew, value, log_prob, entropy)
                nAction = 0
            '''
            obs = pre_processing(env.shadow_map, env._get_curr_block_pos())   # env.map
            action, value, log_prob, entropy = agent.action_train(obs)
            if action == 5:
                action = 100000
            rew, shadow_rew, done, putting, height = env.step(action)  # what is the 'is_new_block'?
            rew = np.clip(rew, -1.0, 64.0)
            if rew == 0.0 and action != 3 and action != 4:
                nMove += 1
                if nMove < 6:
                    rew = 0.2
                if putting:
                    rew = - (height / 20.0)
                    nMove = 0
            agent.put_reward(rew, value, log_prob, entropy)

            # pdb.set_trace()
            if env.is_game_end():
                episode += 1
                agent.done = True

            # if num_steps % params.num_steps == 0:
            # if env.is_game_end() or rew >= 1.0:
            if env.is_game_end():
                next_obs = pre_processing(env.map, env._get_curr_block_pos())
                agent.training(next_obs, shared_model, shared_optimizer, params)
                with lock:  # synchronize vale of all process
                    count.value += 1

            if env.is_game_end():
                break