예제 #1
0
def run_sim(rank, params, shared_model, shared_optimizer, count, lock):
    if not os.path.exists('./' + params.weight_dir):
        os.mkdir('./' + params.weight_dir)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    logging.basicConfig(filename='./log/' + params.log_file + '.log',
                        level=logging.INFO)

    ptitle('Training Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)]
    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')

    torch.manual_seed(random.randint(0, 1000) + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(random.randint(0, 1000) + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()

    Agent = run_agent(model, gpu_id)
    house_id = params.house_id

    if house_id == -1:
        house_id = rank
    if house_id > 50:
        house_id = house_id % 50

    env = Environment(api, get_house_id(house_id, params.difficulty), cfg)
    task = RoomNavTask(env,
                       hardness=params.hardness,
                       segment_input=params.semantic_mode,
                       max_steps=params.max_steps,
                       discrete_action=True)

    n_train = 0
    best_rate = 0.0
    save_model_index = 0

    while True:
        n_train += 1
        training(task, gpu_id, shared_model, Agent, shared_optimizer, params,
                 lock, count)

        if n_train % 1000 == 0:
            with lock:
                n_update = count.value
            with torch.cuda.device(gpu_id):
                Agent.model.load_state_dict(shared_model.state_dict())

            start_time = time.time()
            best_rate, save_model_index = testing(lock, n_update, gpu_id,
                                                  Agent, task, best_rate,
                                                  params, save_model_index,
                                                  start_time, logging,
                                                  house_id)
예제 #2
0
def predict_house(student, weights):
    results = [[], [], [], []]
    house_rev = utils.get_house()
    for row in weights:
        if not np.isnan(student[row[1]]) and not np.isnan(student[row[2]]):
            x = np.array([student[row[1]], student[row[2]]])
            theta = np.array([row[3], row[4], row[5]])
            mean = np.array([row[6], row[7]])
            std = np.array([row[8], row[9]])
            x = (x - mean) / std
            x = np.insert(x, 0, 1, axis=0)
            results[utils.get_house_id(row[0])].append(
                utils.sigmoid(np.dot(x, theta)))
    for i in range(4):
        if (len(results[i]) != 0):
            # results[i] = max(results[i])
            results[i] = sum(results[i]) / len(results[i])
        else:
            results[i] = 0
    return house_rev[results.index(max(results))]
예제 #3
0
def run_sim(rank, params, shared_model, shared_optimizer, count, lock):
    ptitle('Training Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)]
    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')

    if shared_optimizer is None:
        optimizer = optim.Adam(shared_model.parameters(),
                               lr=params.lr,
                               amsgrad=params.amsgrad,
                               weight_decay=params.weight_decay)
        #optimizer.share_memory()
    else:
        optimizer = shared_optimizer

    torch.manual_seed(params.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(params.seed + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()

    Agent = run_agent(model, gpu_id)

    house_id = params.house_id
    if house_id == -1:
        house_id = rank
    if house_id >= 20:
        house_id = house_id % 20

    env = Environment(api, get_house_id(house_id), cfg)
    task = RoomNavTask(env,
                       hardness=params.hardness,
                       segment_input=params.semantic_mode,
                       max_steps=params.max_steps,
                       discrete_action=True)

    for episode in range(params.max_episode):
        next_observation = task.reset()
        target = task.info['target_room']
        target = get_instruction_idx(target)

        with torch.cuda.device(gpu_id):
            target = Variable(torch.LongTensor(target)).cuda()
            Agent.model.load_state_dict(shared_model.state_dict())
            Agent.cx = Variable(torch.zeros(1, 256).cuda())
            Agent.hx = Variable(torch.zeros(1, 256).cuda())
            Agent.target = target

        total_reward, num_steps, good = 0, 0, 0
        Agent.done = False
        done = False
        Agent.eps_len = 0

        while not done:
            num_steps += 1
            observation = next_observation
            act, entropy, value, log_prob = Agent.action_train(
                observation, target)
            next_observation, reward, done, info = task.step(actions[act[0]])

            rew = np.clip(reward, -1.0, 1.0)

            Agent.put_reward(rew, entropy, value, log_prob)
            if num_steps % params.num_steps == 0 or done:
                if done:
                    Agent.done = done
                with lock:
                    count.value += 1
                Agent.training(next_observation, shared_model, optimizer,
                               params)

            if done:
                break
예제 #4
0
def run_sim(rank, params, state_Queue, action_done, actions, reward_Queue,
            lock):
    ptitle('Training Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)]
    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')

    house_id = params.house_id

    if house_id == -1:
        house_id = rank
    if house_id > 50:
        house_id = house_id % 50

    env = Environment(api, get_house_id(house_id, params.difficulty), cfg)
    task = RoomNavTask(env,
                       hardness=params.hardness,
                       segment_input=params.semantic_mode,
                       max_steps=params.max_steps,
                       discrete_action=True)

    while True:
        next_observation = task.reset()
        target = task.info['target_room']
        target = get_instruction_idx(target)

        # with torch.cuda.device(gpu_id):
        #     target = Variable(torch.LongTensor(target)).cuda()

        total_reward, num_steps, good = 0, 0, 0
        done = False
        test = False

        while not done:
            num_steps += 1
            observation = next_observation
            state = rank, [observation, target]
            state_Queue.put(state)

            state_Queue.join()

            # action_done.get()   # action done
            action = actions[rank]
            if action == 99:
                test = True
                break  # call for test

            next_observation, reward, done, info = task.step(action)

            reward = np.clip(reward, -1.0, 10.0)
            if reward != -1.0 and reward != 10.0:  # make sparse reward
                reward = 0.0
            total_reward += reward

            rew = [rank, done, reward]
            # print("send - rank: {:d}, reward: {:3.2f}".format(rank, reward))
            reward_Queue.put(rew)

            reward_Queue.join()

            if done:
                break
예제 #5
0
def test(rank, params, shared_model, count, lock, best_acc, evaluation=True):
    if not os.path.exists('./' + params.weight_dir):
        os.mkdir('./' + params.weight_dir)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    logging.basicConfig(filename='./log/' + params.log_file + '.log',
                        level=logging.INFO)
    ptitle('Test Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)]

    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')
    best_rate = 0.0
    save_model_index = 0
    n_update = 0

    torch.manual_seed(params.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(params.seed + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()

    Agent = run_agent(model, gpu_id)

    house_id = params.house_id
    if house_id == -1:
        house_id = rank
    if house_id >= 20:
        house_id = house_id % 20

    #time.sleep(rank*30)

    env = Environment(api, get_house_id(house_id), cfg)
    task = RoomNavTask(env,
                       hardness=params.hardness,
                       segment_input=params.semantic_mode,
                       max_steps=params.max_steps,
                       discrete_action=True)  #reward_type='indicator'

    start_time = time.time()

    if evaluation is True:
        max_episode = params.max_episode
        n_try = params.n_eval
    else:
        max_episode = 1  # for loaded model test
        n_try = params.n_test

    for episode in range(max_episode):
        eval = []
        if evaluation is True:
            with lock:
                n_update = count.value
            with torch.cuda.device(gpu_id):
                Agent.model.load_state_dict(shared_model.state_dict())
        else:
            with torch.cuda.device(gpu_id):
                Agent.model.load_state_dict(shared_model)
        Agent.model.eval()

        for i in range(n_try):
            next_observation = task.reset()
            target = task.info['target_room']
            target = get_instruction_idx(target)

            with torch.cuda.device(gpu_id):
                target = Variable(torch.LongTensor(target)).cuda()
                Agent.cx = Variable(torch.zeros(1, 256).cuda())
                Agent.hx = Variable(torch.zeros(1, 256).cuda())
                Agent.target = target
            step, total_rew, good = 0, 0, 0
            done = False

            while not done:
                observation = next_observation
                act = Agent.action_test(observation, target)

                next_observation, rew, done, info = task.step(actions[act[0]])
                total_rew += rew

                if rew == 10:  # success
                    good = 1

                step += 1

                if done:
                    break
            eval.append((step, total_rew, good))

        if len(eval) > 0:
            succ = [e for e in eval if e[2] > 0]
            succ_rate = (len(succ) / len(eval)) * 100

            if evaluation is True:  # evaluation mode
                with lock:
                    #if best_acc.value >= best_rate:
                    #    best_rate = best_acc.value
                    if succ_rate >= best_rate:
                        best_rate = succ_rate
                        with torch.cuda.device(gpu_id):
                            torch.save(
                                Agent.model.state_dict(), params.weight_dir +
                                'model' + str(n_update) + '.ckpt')
                        save_model_index += 1
                    #if best_rate > best_acc.value:
                    #    best_acc.value = best_rate

            avg_reward = sum([e[1] for e in eval]) / len(eval)
            avg_length = sum([e[0] for e in eval]) / len(eval)
            msg = " ".join([
                "++++++++++ Task Stats +++++++++++\n", "Time {}\n".format(
                    time.strftime("%dd %Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time))),
                "Episode Played: {:d}\n".format(len(eval)),
                "N_Update = {:d}\n".format(n_update),
                "House id: {:d}\n".format(house_id),
                "Avg Reward = {:5.3f}\n".format(avg_reward),
                "Avg Length = {:.3f}\n".format(avg_length),
                "Best rate {:3.2f}, Success rate {:3.2f}%".format(
                    best_rate, succ_rate)
            ])
            print(msg)
            logging.info(msg)
예제 #6
0
def run_test(rank, params, loaded_model, lock, seen_succ, seen_length,
             unseen_succ, unseen_length):

    logging.basicConfig(filename='./log/' + params.log_file + '.log',
                        level=logging.INFO)
    ptitle('Test Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)]

    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')

    torch.manual_seed(params.seed + rank)
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            torch.cuda.manual_seed(params.seed + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()
        load_model = torch.load(
            loaded_model,
            map_location=lambda storage, loc: storage.cuda(gpu_id))
        model.load_state_dict(load_model)
        model.eval()

    Agent = run_agent(model, gpu_id)

    n_test = 0
    start_time = time.time()

    while True:
        house_id = rank + (n_test * params.n_process)

        if house_id >= 70:
            break
        else:
            if house_id < 20:
                seen = True
                house = get_house_id(house_id)
            else:
                seen = False
                house = get_eval_house_id(house_id -
                                          (n_test * params.n_process))

        env = Environment(api, house, cfg)
        task = RoomNavTask(env,
                           hardness=params.hardness,
                           segment_input=params.semantic_mode,
                           max_steps=params.max_steps,
                           discrete_action=True)  #reward_type='indicator'

        eval = []
        for i in range(params.n_test):
            next_observation = task.reset()
            target = task.info['target_room']
            target = get_instruction_idx(target)

            with torch.cuda.device(gpu_id):
                target = Variable(torch.LongTensor(target)).cuda()
                Agent.cx = Variable(torch.zeros(1, 256).cuda())
                Agent.hx = Variable(torch.zeros(1, 256).cuda())
                Agent.target = target
            step, total_rew, good = 0, 0, 0
            done = False

            while not done:
                observation = next_observation
                act = Agent.action_test(observation, target)

                next_observation, rew, done, info = task.step(actions[act[0]])
                total_rew += rew

                if rew == 10:  # success
                    good = 1

                step += 1

                if done:
                    break
            eval.append((step, total_rew, good))

        if len(eval) > 0:
            succ = [e for e in eval if e[2] > 0]
            succ_rate = (len(succ) / len(eval)) * 100

            avg_reward = sum([e[1] for e in eval]) / len(eval)
            avg_length = sum([e[0] for e in eval]) / len(eval)
            if seen:
                msg_seen = "Seen"
                msg_house = house_id
            else:
                msg_seen = "Unseen"
                msg_house = house_id - 20

            msg = " ".join([
                "++++++++++ Task Stats +++++++++++\n", "Time {}\n".format(
                    time.strftime("%dd %Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time))),
                "Episode Played: {:d}\n".format(len(eval)),
                "{:s} House id: {:d}\n".format(msg_seen, msg_house),
                "Avg Reward = {:5.3f}\n".format(avg_reward),
                "Avg Length = {:.3f}\n".format(avg_length),
                "Success rate {:3.2f}%".format(succ_rate)
            ])
            print(msg)
            logging.info(msg)
            with lock:
                if seen:
                    seen_succ.value += len(succ)
                    seen_length.value += sum([e[0] for e in eval])
                else:
                    unseen_succ.value += len(succ)
                    unseen_length.value += sum([e[0] for e in eval])
            n_test += 1