示例#1
0
    def __init__(self, args):
        action_space = args.action_space
        hidden_state_sz = args.hidden_state_sz
        super(MJOLNIR_O, self).__init__()

        # get and normalize adjacency matrix.
        np.seterr(divide='ignore')
        A_raw = torch.load("./data/gcn/adjmat.dat")
        A = normalize_adj(A_raw).tocsr().toarray()
        self.A = torch.nn.Parameter(torch.Tensor(A))

        n = int(A.shape[0])
        self.n = n

        self.embed_action = nn.Linear(action_space, 10)

        lstm_input_sz = 10 + n * 5 + 512

        self.hidden_state_sz = hidden_state_sz
        self.lstm = nn.LSTMCell(lstm_input_sz, hidden_state_sz)
        num_outputs = action_space
        self.critic_linear = nn.Linear(hidden_state_sz, 1)
        self.actor_linear = nn.Linear(hidden_state_sz, num_outputs)

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain("relu")
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space)

        self.dropout = nn.Dropout(p=args.dropout_rate)

        # glove embeddings for all the objs.
        with open("./data/gcn/objects.txt") as f:
            objects = f.readlines()
            self.objects = [o.strip() for o in objects]
        all_glove = torch.zeros(n, 300)
        glove = Glove(args.glove_file)
        for i in range(n):
            all_glove[i, :] = torch.Tensor(
                glove.glove_embeddings[self.objects[i]][:])

        self.all_glove = nn.Parameter(all_glove)
        self.all_glove.requires_grad = False

        self.W0 = nn.Linear(401, 401, bias=False)
        self.W1 = nn.Linear(401, 401, bias=False)
        self.W2 = nn.Linear(401, 5, bias=False)
        self.W3 = nn.Linear(10, 1, bias=False)

        self.final_mapping = nn.Linear(n, 512)
    def __init__(self, args):
        action_space = args.action_space
        target_embedding_sz = args.glove_dim
        resnet_embedding_sz = 512
        hidden_state_sz = args.hidden_state_sz
        super(GCN_MLP, self).__init__()

        self.conv1 = nn.Conv2d(resnet_embedding_sz, 64, 1)
        self.maxp1 = nn.MaxPool2d(2, 2)
        self.embed_glove = nn.Linear(target_embedding_sz, 64)
        self.embed_action = nn.Linear(action_space, 10)

        pointwise_in_channels = 138

        self.pointwise = nn.Conv2d(pointwise_in_channels, 64, 1, 1)

        lstm_input_sz = 7 * 7 * 64 + 512
        mlp_input_sz = lstm_input_sz

        self.hidden_state_sz = hidden_state_sz
        num_outputs = action_space
        self.critic_linear = nn.Linear(hidden_state_sz, 1)
        self.actor_linear = nn.Linear(hidden_state_sz, num_outputs)

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain("relu")
        self.conv1.weight.data.mul_(relu_gain)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space)

        self.dropout = nn.Dropout(p=args.dropout_rate)

        n = 83
        self.n = n

        # get and normalize adjacency matrix.
        A_raw = torch.load("./data/gcn/adjmat.dat")
        A = normalize_adj(A_raw).tocsr().toarray()
        self.A = torch.nn.Parameter(torch.Tensor(A))

        # last layer of resnet18.
        resnet18 = models.resnet18(pretrained=True)
        modules = list(resnet18.children())[-2:]
        self.resnet18 = nn.Sequential(*modules)
        for p in self.resnet18.parameters():
            p.requires_grad = False

        # glove embeddings for all the objs.
        objects = open("./data/gcn/objects.txt").readlines()
        objects = [o.strip() for o in objects]
        all_glove = torch.zeros(n, 300)
        glove = Glove(args.glove_file)
        for i in range(n):
            all_glove[i, :] = torch.Tensor(
                glove.glove_embeddings[objects[i]][:])

        self.all_glove = nn.Parameter(all_glove)
        self.all_glove.requires_grad = False

        self.get_word_embed = nn.Linear(300, 512)
        self.get_class_embed = nn.Linear(1000, 512)

        self.W0 = nn.Linear(1024, 1024, bias=False)
        self.W1 = nn.Linear(1024, 1024, bias=False)
        self.W2 = nn.Linear(1024, 1, bias=False)

        self.final_mapping = nn.Linear(n, 512)
        hidden_o = mlp_input_sz // 2
        self.W0m = nn.Linear(mlp_input_sz, 512, bias=False)
示例#3
0
def nonadaptivea3c_train(
    rank,
    args,
    create_shared_model,
    shared_model,
    initialize_agent,
    optimizer,
    res_queue,
    end_flag,
):
    glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types,
                                                 args.train_scenes)

    random.seed(args.seed + rank)
    idx = [j for j in range(len(args.scene_types))]
    random.shuffle(idx)

    setproctitle.setproctitle("Training Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]

    import torch

    torch.cuda.set_device(gpu_id)

    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id)
    compute_grad = not isinstance(player, RandomNavigationAgent)

    model_options = ModelOptions()

    j = 0

    while not end_flag.value:

        # Get a new episode.
        total_reward = 0
        player.eps_len = 0
        new_episode(args,
                    player,
                    scenes[idx[j]],
                    possible_targets,
                    targets[idx[j]],
                    glove=glove)
        player_start_time = time.time()

        # Train on the new episode.
        while not player.done:
            # Make sure model is up to date.
            player.sync_with_shared(shared_model)
            # Run episode for num_steps or until player is done.
            total_reward = run_episode(player, args, total_reward,
                                       model_options, True)
            # Compute the loss.
            loss = compute_loss(args, player, gpu_id, model_options)
            if compute_grad:
                # Compute gradient.
                player.model.zero_grad()
                loss["total_loss"].backward()
                torch.nn.utils.clip_grad_norm_(player.model.parameters(),
                                               100.0)
                # Transfer gradient to shared model and step optimizer.
                transfer_gradient_from_player_to_shared(
                    player, shared_model, gpu_id)
                optimizer.step()
                # Clear actions and repackage hidden.
            if not player.done:
                reset_player(player)

        for k in loss:
            loss[k] = loss[k].item()

        end_episode(
            player,
            res_queue,
            title=args.scene_types[idx[j]],
            total_time=time.time() - player_start_time,
            total_reward=total_reward,
        )
        reset_player(player)

        j = (j + 1) % len(args.scene_types)

    player.exit()
示例#4
0
def nonadaptivea3c_val(
    rank,
    args,
    model_to_open,
    model_create_fn,
    initialize_agent,
    res_queue,
    max_count,
    scene_type,
):

    glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types, args.val_scenes)
    num = name_to_num(scene_type)
    scenes = scenes[num]
    targets = targets[num]

    if scene_type == "living_room":
        args.max_episode_length = 200
    else:
        args.max_episode_length = 100

    setproctitle.setproctitle("Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    shared_model = model_create_fn(args)

    if model_to_open != "":
        saved_state = torch.load(
            model_to_open, map_location=lambda storage, loc: storage
        )
        shared_model.load_state_dict(saved_state['model'])

    player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id)
    player.sync_with_shared(shared_model)
    count = 0

    model_options = ModelOptions()

    j = 0

    while count < max_count:

        # Get a new episode.
        total_reward = 0
        player.eps_len = 0
        new_episode(args, player, scenes, possible_targets, targets, glove=glove)
        player_start_state = copy.deepcopy(player.environment.controller.state)
        player_start_time = time.time()

        # Train on the new episode.
        while not player.done:
            # Make sure model is up to date.
            player.sync_with_shared(shared_model)
            # Run episode for num_steps or until player is done.
            total_reward = run_episode(player, args, total_reward, model_options, False)
            # Compute the loss.
            loss = compute_loss(args, player, gpu_id, model_options)
            if not player.done:
                reset_player(player)

        for k in loss:
            loss[k] = loss[k].item()
        spl, best_path_length = compute_spl(player, player_start_state)

        bucketed_spl = get_bucketed_metrics(spl, best_path_length, player.success)

        end_episode(
            player,
            res_queue,
            total_time=time.time() - player_start_time,
            total_reward=total_reward,
            spl=spl,
            **bucketed_spl,
        )

        count += 1
        reset_player(player)

        j = (j + 1) % len(args.scene_types)

    player.exit()
    res_queue.put({"END": True})
示例#5
0
def savn_val(
    rank,
    args,
    model_to_open,
    model_create_fn,
    initialize_agent,
    res_queue,
    max_count,
    scene_type,
):

    glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types,
                                                 args.val_scenes)
    num = name_to_num(scene_type)
    scenes = scenes[0]
    targets = targets[0]

    if scene_type == "living_room":
        args.max_episode_length = 200
    else:
        args.max_episode_length = 100

    #setproctitle.setproctitle("Training Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]

    import torch

    torch.cuda.set_device(gpu_id)

    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    shared_model = model_create_fn(args)

    if model_to_open is not None:
        saved_state = torch.load(
            model_to_open  #, map_location=lambda storage, loc: storage
        )
        shared_model.load_state_dict(saved_state)

    player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id)
    player.sync_with_shared(shared_model)
    count = 0

    player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id)

    model_options = ModelOptions()

    while count < max_count:

        count += 1

        start_time = time.time()
        new_episode(args,
                    player,
                    scenes,
                    possible_targets,
                    targets,
                    glove=glove)
        player_start_state = copy.deepcopy(player.environment.controller.state)
        player.episode.exploring = True
        total_reward = 0
        player.eps_len = 0

        # theta <- shared_initialization
        params_list = [get_params(shared_model, gpu_id)]
        model_options.params = params_list[-1]
        loss_dict = {}
        reward_dict = {}
        episode_num = 0
        num_gradients = 0
        #print(player.s)
        #return

        while True:
            total_reward = run_episode(player, args, total_reward,
                                       model_options, False)

            if player.done:
                break
            if False:
                #if args.gradient_limit < 0 or episode_num < args.gradient_limit:

                num_gradients += 1

                # Compute the loss.
                learned_loss = compute_learned_loss(args, player, gpu_id,
                                                    model_options)

                if args.verbose:
                    print("inner gradient")
                inner_gradient = torch.autograd.grad(
                    learned_loss["learned_loss"],
                    [v for _, v in params_list[episode_num].items()],
                    create_graph=True,
                    retain_graph=True,
                    allow_unused=True,
                )

                params_list.append(
                    SGD_step(params_list[episode_num], inner_gradient,
                             args.inner_lr))
                model_options.params = params_list[-1]

                # reset_player(player)
                episode_num += 1

                for k, v in learned_loss.items():
                    loss_dict["{}/{:d}".format(k, episode_num)] = v.item()

        loss = compute_loss(args, player, gpu_id, model_options)

        for k, v in loss.items():
            loss_dict[k] = v.item()
        reward_dict["total_reward"] = total_reward

        spl, best_path_length = compute_spl(player, player_start_state)
        bucketed_spl = get_bucketed_metrics(spl, best_path_length,
                                            player.success)

        end_episode(
            player,
            res_queue,
            total_time=time.time() - start_time,
            spl=spl,
            **reward_dict,
            **bucketed_spl,
        )

        reset_player(player)

    player.exit()
    res_queue.put({"END": True})
示例#6
0
def savn_train(
    rank,
    args,
    create_shared_model,
    shared_model,
    initialize_agent,
    optimizer,
    res_queue,
    end_flag,
):

    glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types,
                                                 args.train_scenes)

    random.seed(args.seed + rank)
    idx = [j for j in range(len(args.scene_types))]
    random.shuffle(idx)

    setproctitle.setproctitle("Training Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]

    import torch

    torch.cuda.set_device(gpu_id)
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id)

    model_options = ModelOptions()

    j = 0

    while not end_flag.value:

        start_time = time.time()
        new_episode(args,
                    player,
                    scenes[idx[j]],
                    possible_targets,
                    targets[idx[j]],
                    glove=glove)
        player.episode.exploring = True
        total_reward = 0
        player.eps_len = 0

        # theta <- shared_initialization
        params_list = [get_params(shared_model, gpu_id)]
        model_options.params = params_list[-1]
        loss_dict = {}
        reward_dict = {}
        episode_num = 0
        num_gradients = 0

        # Accumulate loss over all meta_train episodes.
        while True:
            # Run episode for k steps or until it is done or has made a mistake (if dynamic adapt is true).
            if args.verbose:
                print("New inner step")
            total_reward = run_episode(player, args, total_reward,
                                       model_options, True)

            if player.done:
                break

            if args.gradient_limit < 0 or episode_num < args.gradient_limit:

                num_gradients += 1

                # Compute the loss.
                learned_loss = compute_learned_loss(args, player, gpu_id,
                                                    model_options)

                if args.verbose:
                    print("inner gradient")
                inner_gradient = torch.autograd.grad(
                    learned_loss["learned_loss"],
                    [v for _, v in params_list[episode_num].items()],
                    create_graph=True,
                    retain_graph=True,
                    allow_unused=True,
                )

                params_list.append(
                    SGD_step(params_list[episode_num], inner_gradient,
                             args.inner_lr))
                model_options.params = params_list[-1]

                # reset_player(player)
                episode_num += 1

                for k, v in learned_loss.items():
                    loss_dict["{}/{:d}".format(k, episode_num)] = v.item()

        loss = compute_loss(args, player, gpu_id, model_options)

        for k, v in loss.items():
            loss_dict[k] = v.item()
        reward_dict["total_reward"] = total_reward

        if args.verbose:
            print("meta gradient")

        # Compute the meta_gradient, i.e. differentiate w.r.t. theta.
        meta_gradient = torch.autograd.grad(
            loss["total_loss"],
            [v for _, v in params_list[0].items()],
            allow_unused=True,
        )

        end_episode(
            player,
            res_queue,
            title=args.scene_types[idx[j]],
            episode_num=0,
            total_time=time.time() - start_time,
            total_reward=total_reward,
        )

        # Copy the meta_gradient to shared_model and step.
        transfer_gradient_to_shared(meta_gradient, shared_model, gpu_id)
        optimizer.step()
        reset_player(player)

        j = (j + 1) % len(args.scene_types)

    player.exit()
示例#7
0
    def __init__(self, args):
        action_space = args.action_space
        target_embedding_sz = args.glove_dim
        resnet_embedding_sz = 512
        hidden_state_sz = args.hidden_state_sz
        super(MJOLNIR_R, self).__init__()

        self.conv1 = nn.Conv2d(resnet_embedding_sz, 64, 1)
        self.maxp1 = nn.MaxPool2d(2, 2)
        self.embed_glove = nn.Linear(target_embedding_sz, 64)
        self.embed_action = nn.Linear(action_space, 10)

        pointwise_in_channels = 64 + 64 + 10

        self.pointwise = nn.Conv2d(pointwise_in_channels, 64, 1, 1)

        lstm_input_sz = 7 * 7 * 64 + 512

        self.hidden_state_sz = hidden_state_sz
        self.lstm = nn.LSTMCell(lstm_input_sz, hidden_state_sz)
        num_outputs = action_space
        self.critic_linear = nn.Linear(hidden_state_sz, 1)
        self.actor_linear = nn.Linear(hidden_state_sz, num_outputs)

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain("relu")
        self.conv1.weight.data.mul_(relu_gain)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space)

        self.dropout = nn.Dropout(p=args.dropout_rate)

        # get and normalize adjacency matrix.
        np.seterr(divide='ignore')
        A_raw = torch.load("./data/gcn/adjmat.dat")
        A = normalize_adj(A_raw).tocsr().toarray()
        self.A = torch.nn.Parameter(torch.Tensor(A))

        n = int(A.shape[0])
        self.n = n

        # last layer of resnet18.
        resnet18 = models.resnet18(pretrained=True)
        modules = list(resnet18.children())[-2:]
        self.resnet18 = nn.Sequential(*modules)
        for p in self.resnet18.parameters():
            p.requires_grad = False

        # glove embeddings for all the objs.
        with open("./data/gcn/objects.txt") as f:
            objects = f.readlines()
            self.objects = [o.strip() for o in objects]
        all_glove = torch.zeros(n, 300)
        glove = Glove(args.glove_file)
        for i in range(n):
            all_glove[i, :] = torch.Tensor(
                glove.glove_embeddings[self.objects[i]][:])

        self.all_glove = nn.Parameter(all_glove)
        self.all_glove.requires_grad = False

        self.W0 = nn.Linear(401, 401, bias=False)
        self.W1 = nn.Linear(401, 401, bias=False)
        self.W2 = nn.Linear(401, 5, bias=False)
        self.W3 = nn.Linear(10, 1, bias=False)

        self.final_mapping = nn.Linear(n, 512)
示例#8
0
def nonadaptivea3c_train(
    rank,
    args,
    create_shared_model,
    shared_model,
    initialize_agent,
    optimizer,
    res_queue,
    end_flag,
    global_ep,
):

    glove = None
    protos = None
    pre_metadata = None
    curriculum_meta = None
    scene_types = args.scene_types

    if args.glove_file:
        glove = Glove(args.glove_file)
    if args.proto_file:
        protos = Prototype(args.proto_file)

    if args.data_source == "ithor":
        from datasets.ithor_data import get_data
        scenes, possible_targets, targets = get_data(scene_types, args.train_scenes)

    elif args.data_source == "robothor":

        from datasets.robothor_data import get_data

        # check if use pinned_scene mode
        if args.pinned_scene:
            # TODO: design a flexible scene allocating strategy
            scene_types = [scene_types[(rank % len(scene_types))]]
            pre_metadata = preload_metadata(args, scene_types)

        scenes, possible_targets, targets = get_data(scene_types)

        if args.curriculum_learning:
            curriculum_meta = get_curriculum_meta(args, scenes)


    # is pinned_scene set to True, pre-load all metadata for controller
    # constructed in new_episode()


    random.seed(args.seed + rank)
    idx = list(range(len(scene_types)))
    random.shuffle(idx)

    setproctitle.setproctitle("Training Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]

    import torch

    torch.cuda.set_device(gpu_id)

    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id)
    compute_grad = not isinstance(player, RandomNavigationAgent)

    model_options = ModelOptions()

    j = 0

    while not end_flag.value:

        # Get a new episode.
        total_reward = 0
        player.eps_len = 0
        # new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]],glove=glove, protos=protos,
        #     pre_metadata=pre_metadata, curriculum_meta=curriculum_meta, total_ep=global_ep.value)
        scene = new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]],glove=glove, protos=protos,
            pre_metadata=pre_metadata, curriculum_meta=curriculum_meta)
        player_start_time = time.time()

        # Train on the new episode.
        while not player.done:
            # Make sure model is up to date.
            player.sync_with_shared(shared_model)
            # Run episode for num_steps or until player is done.
            total_reward = run_episode(player, args, total_reward, model_options, True)

            # plot trajectory , by wuxiaodong
            if args.demo_trajectory and global_ep.value % args.demo_trajectory_freq == 0:
                print(len(player.episode.episode_trajectories))
                # todo delete
                # scene = 'FloorPlan_Train1_1'
                trajectory_pil = get_trajectory(scene,
                                                [str(loc) for loc in player.episode.episode_trajectories],
                                                birdview_root='./demo_robothor/data/birdview/',
                                                init_loc_str=player.episode.init_pos_str,
                                                target_loc_str=player.episode.target_pos_str,
                                                actions=player.episode.actions_taken,
                                                success=player.success, target_name=player.episode.target_object)
                demo_out_dir = os.path.join(args.log_dir, '../output_trajecgtory', args.title)
                if not os.path.exists(demo_out_dir):
                    os.makedirs(demo_out_dir)
                trajectory_pil.save(os.path.join(demo_out_dir, '{}_init_{}_target_{}_iter{}.png'.format(
                    player.episode.object_type,
                    player.episode.init_pos_str,
                    player.episode.target_pos_str,
                    global_ep.value
                )))
                print('ploting {}_init_{}_target_{}_iter{}.png'.format(
                    player.episode.object_type,
                    player.episode.init_pos_str,
                    player.episode.target_pos_str,
                    global_ep.value
                ))

            # Compute the loss.
            loss = compute_loss(args, player, gpu_id, model_options)
            if compute_grad:
                # Compute gradient.
                player.model.zero_grad()
                loss["total_loss"].backward()
                torch.nn.utils.clip_grad_norm_(player.model.parameters(), 100.0)
                # Transfer gradient to shared model and step optimizer.
                transfer_gradient_from_player_to_shared(player, shared_model, gpu_id)
                optimizer.step()
            # Clear actions and repackage hidden.
            if not player.done:
                reset_player(player)

        # print("Training Agent {}: finished episodes on {}, local loss {}".format(
        #     rank, scene, loss.cpu().detach().numpy() ))

        for k in loss:
            loss[k] = loss[k].item()

        end_episode(
            player,
            res_queue,
            title=scene_types[idx[j]],
            total_time=time.time() - player_start_time,
            total_reward=total_reward,
            policy_loss=loss['policy_loss'],
            value_loss=loss['value_loss']
        )

        reset_player(player)

        j = (j + 1) % len(scene_types)

    player.exit()
示例#9
0
def nonadaptivea3c_val(
    rank,
    args,
    model_to_open,
    model_create_fn,
    initialize_agent,
    res_queue,
    max_count,
    scene_type,
):

    glove = None
    protos = None
    pre_metadata = None
    curriculum_meta = None
    scene_types = [scene_type]
    offline_shortest_data = None

    if args.glove_file:
        glove = Glove(args.glove_file)
    if args.proto_file:
        protos = Prototype(args.proto_file)

    if args.data_source == "ithor":

        from datasets.ithor_data import get_data, name_to_num

        scenes, possible_targets, targets = get_data(scene_types,
                                                     args.val_scenes)
        num = name_to_num(scene_type)
        scenes = scenes[0]
        targets = targets[0]

    elif args.data_source == "robothor":

        from datasets.robothor_data import get_data
        # TODO: design a flexible scene allocating strategy

        pre_metadata = preload_metadata(args, scene_types)

        scenes, possible_targets, targets = get_data(scene_types)
        scenes = scenes[0]
        targets = targets[0]

        if args.curriculum_learning:
            curriculum_meta = get_curriculum_meta(args, scenes)
            if args.offline_shortest_data:
                offline_shortest_data = load_offline_shortest_path_data(
                    args, scenes)

    setproctitle.setproctitle("Val Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    shared_model = model_create_fn(args)

    if model_to_open != "":
        saved_state = torch.load(model_to_open,
                                 map_location=lambda storage, loc: storage)
        shared_model.load_state_dict(saved_state)

    player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id)
    player.sync_with_shared(shared_model)
    count = 0

    model_options = ModelOptions()

    while count < max_count:

        # Get a new episode.
        total_reward = 0
        player.eps_len = 0
        scene = new_episode(args,
                            player,
                            scenes,
                            possible_targets,
                            targets,
                            glove=glove,
                            protos=protos,
                            pre_metadata=pre_metadata,
                            curriculum_meta=curriculum_meta)
        if scene == None:  # iteration stopped
            break

        player_start_state = copy.deepcopy(player.environment.controller.state)
        player_start_time = time.time()

        # Train on the new episode.
        while not player.done:

            # Make sure model is up to date.
            # player.sync_with_shared(shared_model)
            # Run episode for num_steps or until player is done.
            total_reward = run_episode(player, args, total_reward,
                                       model_options, False)
            # Compute the loss.
            # loss = compute_loss(args, player, gpu_id, model_options)
            if not player.done:
                reset_player(player)

        # for k in loss:
        #     loss[k] = loss[k].item()
        if offline_shortest_data:  # assume data_source == robothor and curriculum_learning is True
            scene = player.environment.scene_name
            episode_id = player.episode.episode_id
            best_path_length = offline_shortest_data[scene][episode_id]
            spl = player.success * (best_path_length / float(player.eps_len))
        else:
            spl, best_path_length = compute_spl(player, player_start_state)

        bucketed_spl = get_bucketed_metrics(spl, best_path_length,
                                            player.success)
        if args.curriculum_learning:
            end_episode(player,
                        res_queue,
                        total_time=time.time() - player_start_time,
                        total_reward=total_reward,
                        spl=spl,
                        **bucketed_spl,
                        scene_type=scene_type,
                        difficulty=player.episode.difficulty)
        else:
            end_episode(
                player,
                res_queue,
                total_time=time.time() - player_start_time,
                total_reward=total_reward,
                spl=spl,
                **bucketed_spl,
                scene_type=scene_type,
            )

        count += 1
        reset_player(player)

    player.exit()
    res_queue.put({
        "END": True,
        "scene_type": scene_type,
        "total_episodes": count
    })
示例#10
0
def nonadaptivea3c_train(
    rank,
    args,
    create_shared_model,
    shared_model,
    initialize_agent,
    optimizer,
    res_queue,
    end_flag,
):
    # print('Now Im in nonadaptivea3c_train')
    glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types,
                                                 args.train_scenes)
    # print('We have glove embeddings and data wow')
    random.seed(args.seed + rank)
    idx = [j for j in range(len(args.scene_types))]
    random.shuffle(idx)
    # print('scene types have been shuffled')
    setproctitle.setproctitle("Training Agent: {}".format(rank))
    # print('some set proctitle bullshit')
    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    # print('something gpu id')
    import torch
    # print('Torch imported')
    torch.cuda.set_device(gpu_id)
    # print('Looks like we cannot set device, cuda is a bunch of fuckers for gpu id : {}'.format(gpu_id))
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        # print('gpu id > 0, who knows what happens next')
        torch.cuda.manual_seed(args.seed + rank)
    # print('Done doing gpu bullshit')
    player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id)
    # print('agent initialized')
    compute_grad = not isinstance(player, RandomNavigationAgent)
    # print('Something something compute gradient')
    model_options = ModelOptions()

    j = 0
    print('Right before while loop')
    while not end_flag.value:

        # Get a new episode.
        total_reward = 0
        player.eps_len = 0
        new_episode(args,
                    player,
                    scenes[idx[j]],
                    possible_targets,
                    targets[idx[j]],
                    glove=glove)
        player_start_time = time.time()

        # Train on the new episode.
        while not player.done:
            # Make sure model is up to date.
            player.sync_with_shared(shared_model)
            # Run episode for num_steps or until player is done.
            total_reward = run_episode(player, args, total_reward,
                                       model_options, True)
            # Compute the loss.
            loss = compute_loss(args, player, gpu_id, model_options)
            if compute_grad:
                # Compute gradient.
                player.model.zero_grad()
                loss["total_loss"].backward()
                torch.nn.utils.clip_grad_norm_(player.model.parameters(),
                                               100.0)
                # Transfer gradient to shared model and step optimizer.
                transfer_gradient_from_player_to_shared(
                    player, shared_model, gpu_id)
                optimizer.step()
                # Clear actions and repackage hidden.
            if not player.done:
                reset_player(player)

        for k in loss:
            loss[k] = loss[k].item()

        end_episode(
            player,
            res_queue,
            title=args.scene_types[idx[j]],
            total_time=time.time() - player_start_time,
            total_reward=total_reward,
        )
        reset_player(player)

        j = (j + 1) % len(args.scene_types)
    print('End of while loop')
    player.exit()