Exemplo n.º 1
0
        exit()

    shared_nav_model.share_memory()

    print('Loading navigation params from checkpoint: %s' %
          args.nav_checkpoint_path)
    shared_nav_model.load_state_dict(checkpoint['state'])

    # Load answering model
    print('Loading answering checkpoint from %s' % args.ans_checkpoint_path)
    ans_checkpoint = torch.load(args.ans_checkpoint_path,
                                map_location={'cuda:0': 'cpu'})

    ans_model_kwargs = {'vocab': load_vocab(args.vocab_json)}
    shared_ans_model = VqaLstmCnnAttentionModel(**ans_model_kwargs)

    shared_ans_model.share_memory()

    print('Loading params from checkpoint: %s' % args.ans_checkpoint_path)
    shared_ans_model.load_state_dict(ans_checkpoint['state'])

    if args.mode == 'eval':

        eval(0, args, shared_nav_model, shared_ans_model)

    elif args.mode == 'train':

        train(0, args, shared_nav_model, shared_ans_model)

    else:
Exemplo n.º 2
0
def train(rank, args, shared_model):

    torch.cuda.set_device(args.gpus.index(args.gpus[rank % len(args.gpus)]))

    if args.input_type == 'ques':

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        model = VqaLstmModel(**model_kwargs)

    elif args.input_type == 'ques,image':

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        model = VqaLstmCnnAttentionModel(**model_kwargs)

    lossFn = torch.nn.CrossEntropyLoss().cuda()

    optim = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                    shared_model.parameters()),
                             lr=args.learning_rate)

    train_loader_kwargs = {
        'questions_h5': args.train_h5,
        'vocab': args.vocab_json,
        'batch_size': args.batch_size,
        'input_type': args.input_type,
        'num_frames': args.num_frames,
        'split': 'train',
        'max_threads_per_gpu': args.max_threads_per_gpu,
        'gpu_id': args.gpus[rank % len(args.gpus)],
    }

    args.output_log_path = os.path.join(args.log_dir,
                                        'train_' + str(rank) + '.json')

    metrics = VqaMetric(
        info={
            'split': 'train',
            'thread': rank
        },
        metric_names=['loss', 'accuracy', 'mean_rank', 'mean_reciprocal_rank'],
        log_json=args.output_log_path)
    print('[TRAIN_LOADER] start')
    train_loader = EqaDataLoader(**train_loader_kwargs)

    print('train_loader has %d samples' % len(train_loader.dataset))

    t, epoch = 0, 0

    while epoch < int(args.max_epochs):
        # print('epoch no. %d' % epoch)

        if args.input_type == 'ques':

            for batch in train_loader:

                t += 1

                model.load_state_dict(shared_model.state_dict())
                model.train()
                model.cuda()

                idx, questions, answers = batch

                questions_var = Variable(questions.cuda())
                answers_var = Variable(answers.cuda())

                scores = model(questions_var)
                loss = lossFn(scores, answers_var)

                # zero grad
                optim.zero_grad()

                # update metrics
                accuracy, ranks = metrics.compute_ranks(
                    scores.data.cpu(), answers)
                metrics.update([loss.data[0], accuracy, ranks, 1.0 / ranks])

                # backprop and update
                loss.backward()

                ensure_shared_grads(model.cpu(), shared_model)
                optim.step()

                if t % args.print_every == 0:
                    print(metrics.get_stat_string())
                    if args.log == True:
                        metrics.dump_log()

        elif args.input_type == 'ques,image':

            done = False
            all_envs_loaded = True
            #all_envs_loaded = train_loader.dataset._check_if_all_envs_loaded()

            while done == False:

                for batch in train_loader:

                    t += 1

                    model.load_state_dict(shared_model.state_dict())
                    model.train()
                    model.cnn.eval()
                    model.cuda()

                    idx, questions, answers, images, _, _, _ = batch

                    print('--- images dim {}'.format(images.size()))

                    questions_var = Variable(questions.cuda())
                    answers_var = Variable(answers.cuda())
                    images_var = Variable(images.cuda())

                    scores, att_probs = model(images_var, questions_var)
                    loss = lossFn(scores, answers_var)

                    print('--- att_probs: {}.'.format(att_probs))
                    # print('--- answers_var: {}.'.format(answers_var))
                    # print('--- loss: {}.'.format(loss))

                    # zero grad
                    optim.zero_grad()

                    # update metrics
                    accuracy, ranks = metrics.compute_ranks(
                        scores.data.cpu(), answers)
                    metrics.update([loss.item(), accuracy, ranks, 1.0 / ranks])

                    # backprop and update
                    loss.backward()

                    ensure_shared_grads(model.cpu(), shared_model)
                    optim.step()

                    if t % args.print_every == 0:
                        print(metrics.get_stat_string())
                        if args.log == True:
                            metrics.dump_log()

                if all_envs_loaded == False:
                    print('[CHECK][Cache:%d][Total:%d]' %
                          (len(train_loader.dataset.img_data_cache),
                           len(train_loader.dataset.env_list)))
                    train_loader.dataset._load_envs(in_order=True)
                    if len(train_loader.dataset.pruned_env_set) == 0:
                        done = True
                else:
                    done = True

        epoch += 1
Exemplo n.º 3
0
def eval(rank, args, shared_model):
    print('eval start...')

    torch.cuda.set_device(args.gpus.index(args.gpus[rank % len(args.gpus)]))

    if args.input_type == 'ques':

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        model = VqaLstmModel(**model_kwargs)

    elif args.input_type == 'ques,image':

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        model = VqaLstmCnnAttentionModel(**model_kwargs)

    lossFn = torch.nn.CrossEntropyLoss().cuda()

    eval_loader_kwargs = {
        'questions_h5': getattr(args, args.eval_split + '_h5'),
        'vocab': args.vocab_json,
        'batch_size': 1,
        'input_type': args.input_type,
        'num_frames': args.num_frames,
        'split': args.eval_split,
        'max_threads_per_gpu': args.max_threads_per_gpu,
        'gpu_id': args.gpus[rank % len(args.gpus)],
    }

    # print(eval_loader_kwargs)

    eval_loader = EqaDataLoader(**eval_loader_kwargs)
    print('eval_loader has %d samples' % len(eval_loader.dataset))

    args.output_log_path = os.path.join(args.log_dir,
                                        'eval_' + str(rank) + '.json')

    t, epoch, best_eval_acc = 0, 0, 0

    while epoch < int(args.max_epochs):

        model.load_state_dict(shared_model.state_dict())
        model.eval()

        metrics = VqaMetric(info={'split': args.eval_split},
                            metric_names=[
                                'loss', 'accuracy', 'mean_rank',
                                'mean_reciprocal_rank'
                            ],
                            log_json=args.output_log_path)

        if args.input_type == 'ques':
            for batch in eval_loader:
                t += 1

                model.cuda()

                idx, questions, answers = batch

                questions_var = Variable(questions.cuda())
                answers_var = Variable(answers.cuda())

                scores = model(questions_var)
                loss = lossFn(scores, answers_var)

                print(scores)

                # update metrics
                accuracy, ranks = metrics.compute_ranks(
                    scores.data.cpu(), answers)
                metrics.update([loss.data[0], accuracy, ranks, 1.0 / ranks])

            print(metrics.get_stat_string(mode=0))

        elif args.input_type == 'ques,image':
            done = False
            all_envs_loaded = True
            #all_envs_loaded = eval_loader.dataset._check_if_all_envs_loaded()

            while done == False:
                for batch in eval_loader:
                    t += 1

                    model.cuda()

                    idx, questions, answers, images, _, _, _ = batch

                    questions_var = Variable(questions.cuda())
                    answers_var = Variable(answers.cuda())
                    images_var = Variable(images.cuda())

                    scores, att_probs = model(images_var, questions_var)
                    loss = lossFn(scores, answers_var)

                    # update metrics
                    accuracy, ranks = metrics.compute_ranks(
                        scores.data.cpu(), answers)
                    metrics.update(
                        [loss.data[0], accuracy, ranks, 1.0 / ranks])

                print(metrics.get_stat_string(mode=0))

                if all_envs_loaded == False:
                    eval_loader.dataset._load_envs()
                    if len(eval_loader.dataset.pruned_env_set) == 0:
                        done = True
                else:
                    done = True

        epoch += 1

        # checkpoint if best val accuracy
        if metrics.metrics[1][0] > best_eval_acc:
            best_eval_acc = metrics.metrics[1][0]
            if epoch % args.eval_every == 0 and args.log == True:
                metrics.dump_log()

                model_state = get_state(model)

                if args.checkpoint_path != False:
                    ad = checkpoint['args']
                else:
                    ad = args.__dict__

                checkpoint = {'args': ad, 'state': model_state, 'epoch': epoch}

                checkpoint_path = '%s/epoch_%d_accuracy_%.04f.pt' % (
                    args.checkpoint_dir, epoch, best_eval_acc)
                print('Saving checkpoint to %s' % checkpoint_path)
                torch.save(checkpoint, checkpoint_path)

        print('[best_eval_accuracy:%.04f]' % best_eval_acc)
Exemplo n.º 4
0
def test(rank):
    nav_model_kwargs = {'question_vocab': load_vocab(args.vocab_json)}
    nav_model = NavPlannerControllerModel(**nav_model_kwargs)
    nav_checkpoint = torch.load(args.nav_weight)  #load checkpoint weights
    nav_model.load_state_dict(nav_checkpoint['state'])  #create model
    print('--- nav_model loaded checkpoint ---')

    cnn_kwargs = {'num_classes': 191, 'pretrained': True}
    cnn = MultitaskCNN(**cnn_kwargs)
    cnn.eval()
    cnn.cuda()  #create cnn model

    vqa_model_kwargs = {'vocab': load_vocab(args.vocab_json)}
    vqa_model = VqaLstmCnnAttentionModel(**vqa_model_kwargs)
    vqa_checkpoint = torch.load(args.vqa_weight)  #load checkpoint weights
    vqa_model.load_state_dict(vqa_checkpoint['state'])
    print('--- vqa_model loaded checkpoint ---')

    # need cnn?

    scene = "test-10-obj-100.txt"
    my_env = enviroment.Environment(is_testing=0, testing_file=scene)
    object_exist_list = my_env.ur5.object_type
    print("Objetcts that exist: ")
    print(object_exist_list)  #create simulation enviroment

    my_question = Qusetion(object_exist_list)  #create testing question
    testing_questions = my_question.createQueue()
    vocab = my_question.create_vocab()

    for question in testing_questions:
        planner_hidden = None
        max_action = 30
        position = [0, 0]
        action_in_raw = [0]  #start action_in
        actions = []

        print(question['question'])  #question
        questionTokens = my_question.tokenize(question['question'],
                                              punctToRemove=['?'],
                                              addStartToken=False)
        encoded_question_raw = my_question.encode(questionTokens,
                                                  vocab['questionTokenToIdx'])
        encoded_question_raw.append(0)  #encode question
        encoded_question_raw = np.array(encoded_question_raw)
        encoded_question_tensor = _dataset_to_tensor(encoded_question_raw)
        encoded_question = Variable(encoded_question_tensor)
        encoded_question = encoded_question.unsqueeze(0)
        print(encoded_question)
        action_times = 0
        push_signal = 0
        push_point = 0

        while (action_times < max_action):

            #print(planner_img_feats_var.size())
            action_in_tensor = _dataset_to_tensor(action_in_raw)
            action_in = Variable(action_in_tensor)
            action_in = action_in.unsqueeze(0)
            action_in = action_in.unsqueeze(0)

            _, rgb_image_raw = my_env.camera.get_camera_data()  #before
            position_in, planner_img_feats_var = data2input(
                position, rgb_image_raw, cnn)

            output_data, planner_hidden = nav_model.planner_step(
                encoded_question, planner_img_feats_var, action_in,
                position_in, planner_hidden)
            planner_possi = F.log_softmax(output_data, dim=1)
            planner_data = planner_possi.data.numpy()
            planner_data = planner_data[0]
            action_out = np.where(planner_data == np.max(planner_data))
            action_out = action_out[0][0]

            actions.append(action_out)
            action_in_raw = [action_out]
            if action_out == 9:
                print('stop')
                break
            elif action_out == 0:
                push_signal = 1
                push_point = action_times
            else:
                dx, dy = order2action(action_out)
                position[0] += dx
                position[1] += dy
            action_times += 1

        if len(actions) > 2 and push_signal == 0:
            action_position = position + position
            my_env.UR5_action(action_position, 2)  #sucking
        elif len(actions) > 2 and push_signal == 1:  #pushing
            position_start = [0, 0]
            position_end = [0, 0]
            for i in range(len(actions)):
                if i <= push_point:  #the first step
                    dx, dy = order2action(actions[i])
                    position_start[0] += dx
                    position_start[1] += dy
                    position_end[0] += dx
                    position_end[1] += dy
                else:  #the second step
                    dx, dy = order2action(actions[i])
                    position_end[0] += dx
                    position_end[1] += dy
            action_position = position_start + position_end
            my_env.UR5_action(action_position, 1)  #pushing

        # get image after actions
        _, rgb_image_after = my_env.camera.get_camera_data(
        )  # image after actions
        shrink = cv.resize(rgb_image_raw, (224, 224),
                           interpolation=cv.INTER_AREA)
        shrink = np.array(shrink)
        shrink = shrink.transpose((2, 0, 1))
        shrink = shrink.reshape(1, 3, 224, 224)
        shrink = (shrink / 255.0).astype(np.float32)
        images = torch.FloatTensor(shrink)
        images = Variable(images)
        images = images.unsqueeze(0)

        # process images

        # answer question in vqa now
        # encoded_question already done

        scores, _ = vqa_model(images, encoded_question)
        scores = scores.data.numpy()
        scores = scores[0]
        answer_predict = np.where(scores == np.max(scores))
        answer_predict = answer_predict[0][0]
        if answer_predict == 0:
            print('--- Predict: Exists not')
        elif answer_predict == 1:
            print('--- Predict: Exists')
        else:
            raise Exception('Prediction neither 0 nor 1')
Exemplo n.º 5
0
    print(args.__dict__)

    if not os.path.exists(args.checkpoint_dir) and args.log == True:
        os.makedirs(args.checkpoint_dir)
        os.makedirs(args.log_dir)

    if args.input_type == 'ques':

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        shared_model = VqaLstmModel(**model_kwargs)

    elif args.input_type == 'ques,image':

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        print('[CREATE] SHARED MODEL')
        shared_model = VqaLstmCnnAttentionModel(**model_kwargs)
        print('[FINISH] SHARED MODEL')

    if args.checkpoint_path != False:
        print('Loading params from checkpoint: %s' % args.checkpoint_path)
        shared_model.load_state_dict(checkpoint['state'])

    shared_model.share_memory()

    if args.mode == 'eval':

        eval(0, args, shared_model)

    else:

        processes = []
Exemplo n.º 6
0
def eval(rank, args, shared_nav_model, shared_ans_model):

    torch.cuda.set_device(args.gpus.index(args.gpus[rank % len(args.gpus)]))

    if args.model_type == 'pacman':

        model_kwargs = {'question_vocab': load_vocab(args.vocab_json)}
        nav_model = NavPlannerControllerModel(**model_kwargs)

    else:

        exit()

    model_kwargs = {'vocab': load_vocab(args.vocab_json)}
    ans_model = VqaLstmCnnAttentionModel(**model_kwargs)

    eval_loader_kwargs = {
        'questions_h5': getattr(args, args.eval_split + '_h5'),
        'data_json': args.data_json,
        'vocab': args.vocab_json,
        'target_obj_conn_map_dir': args.target_obj_conn_map_dir,
        'map_resolution': args.map_resolution,
        'batch_size': 1,
        'input_type': args.model_type,
        'num_frames': 5,
        'split': args.eval_split,
        'max_threads_per_gpu': args.max_threads_per_gpu,
        'gpu_id': args.gpus[rank % len(args.gpus)],
        'to_cache': False
    }

    eval_loader = EqaDataLoader(**eval_loader_kwargs)
    print('eval_loader has %d samples' % len(eval_loader.dataset))

    args.output_nav_log_path = os.path.join(args.log_dir,
                                            'nav_eval_' + str(rank) + '.json')
    args.output_ans_log_path = os.path.join(args.log_dir,
                                            'ans_eval_' + str(rank) + '.json')

    t, epoch, best_eval_acc = 0, 0, 0.0

    while epoch < int(args.max_epochs):

        start_time = time.time()
        invalids = []

        nav_model.load_state_dict(shared_nav_model.state_dict())
        nav_model.eval()

        ans_model.load_state_dict(shared_ans_model.state_dict())
        ans_model.eval()
        ans_model.cuda()

        # that's a lot of numbers
        nav_metrics = NavMetric(
            info={'split': args.eval_split,
                  'thread': rank},
            metric_names=[
                'd_0_10', 'd_0_30', 'd_0_50', 'd_T_10', 'd_T_30', 'd_T_50',
                'd_D_10', 'd_D_30', 'd_D_50', 'd_min_10', 'd_min_30',
                'd_min_50', 'r_T_10', 'r_T_30', 'r_T_50', 'r_e_10', 'r_e_30',
                'r_e_50', 'stop_10', 'stop_30', 'stop_50', 'ep_len_10',
                'ep_len_30', 'ep_len_50'
            ],
            log_json=args.output_nav_log_path)

        vqa_metrics = VqaMetric(
            info={'split': args.eval_split,
                  'thread': rank},
            metric_names=[
                'accuracy_10', 'accuracy_30', 'accuracy_50', 'mean_rank_10',
                'mean_rank_30', 'mean_rank_50', 'mean_reciprocal_rank_10',
                'mean_reciprocal_rank_30', 'mean_reciprocal_rank_50'
            ],
            log_json=args.output_ans_log_path)

        if 'pacman' in args.model_type:

            done = False

            while done == False:

                for batch in tqdm(eval_loader):

                    nav_model.load_state_dict(shared_nav_model.state_dict())
                    nav_model.eval()
                    nav_model.cuda()

                    idx, question, answer, actions, action_length = batch
                    metrics_slug = {}

                    h3d = eval_loader.dataset.episode_house

                    # evaluate at multiple initializations
                    for i in [10, 30, 50]:

                        t += 1

                        if i > action_length[0]:
                            invalids.append([idx[0], i])
                            continue

                        question_var = Variable(question.cuda())

                        controller_step = False
                        planner_hidden = nav_model.planner_nav_rnn.init_hidden(
                            1)

                        # forward through planner till spawn
                        planner_actions_in, planner_img_feats, controller_step, controller_action_in, controller_img_feat, init_pos = eval_loader.dataset.get_hierarchical_features_till_spawn(
                            actions[0, :action_length[0] + 1].numpy(), i)

                        planner_actions_in_var = Variable(
                            planner_actions_in.cuda())
                        planner_img_feats_var = Variable(
                            planner_img_feats.cuda())

                        for step in range(planner_actions_in.size(0)):

                            planner_scores, planner_hidden = nav_model.planner_step(
                                question_var, planner_img_feats_var[step].view(
                                    1, 1,
                                    3200), planner_actions_in_var[step].view(
                                        1, 1), planner_hidden)

                        if controller_step == True:

                            controller_img_feat_var = Variable(
                                controller_img_feat.cuda())
                            controller_action_in_var = Variable(
                                torch.LongTensor(1, 1).fill_(
                                    int(controller_action_in)).cuda())

                            controller_scores = nav_model.controller_step(
                                controller_img_feat_var.view(1, 1, 3200),
                                controller_action_in_var.view(1, 1),
                                planner_hidden[0])

                            prob = F.softmax(controller_scores, dim=1)
                            controller_action = int(
                                prob.max(1)[1].data.cpu().numpy()[0])

                            if controller_action == 1:
                                controller_step = True
                            else:
                                controller_step = False

                            action = int(controller_action_in)
                            action_in = torch.LongTensor(
                                1, 1).fill_(action + 1).cuda()

                        else:

                            prob = F.softmax(planner_scores, dim=1)
                            action = int(prob.max(1)[1].data.cpu().numpy()[0])

                            action_in = torch.LongTensor(
                                1, 1).fill_(action + 1).cuda()

                        h3d.env.reset(
                            x=init_pos[0], y=init_pos[2], yaw=init_pos[3])

                        init_dist_to_target = h3d.get_dist_to_target(
                            h3d.env.cam.pos)
                        if init_dist_to_target < 0:  # unreachable
                            invalids.append([idx[0], i])
                            continue

                        episode_length = 0
                        episode_done = True
                        controller_action_counter = 0

                        dists_to_target, pos_queue, pred_actions = [
                            init_dist_to_target
                        ], [init_pos], []
                        planner_actions, controller_actions = [], []

                        if action != 3:

                            # take the first step
                            img, _, _ = h3d.step(action)
                            img = torch.from_numpy(img.transpose(
                                2, 0, 1)).float() / 255.0
                            img_feat_var = eval_loader.dataset.cnn(
                                Variable(img.view(1, 3, 224,
                                                  224).cuda())).view(
                                                      1, 1, 3200)

                            for step in range(args.max_episode_length):

                                episode_length += 1

                                if controller_step == False:
                                    planner_scores, planner_hidden = nav_model.planner_step(
                                        question_var, img_feat_var,
                                        Variable(action_in), planner_hidden)

                                    prob = F.softmax(planner_scores, dim=1)
                                    action = int(
                                        prob.max(1)[1].data.cpu().numpy()[0])
                                    planner_actions.append(action)

                                pred_actions.append(action)
                                img, _, episode_done = h3d.step(action)

                                episode_done = episode_done or episode_length >= args.max_episode_length

                                img = torch.from_numpy(img.transpose(
                                    2, 0, 1)).float() / 255.0
                                img_feat_var = eval_loader.dataset.cnn(
                                    Variable(img.view(1, 3, 224, 224)
                                             .cuda())).view(1, 1, 3200)

                                dists_to_target.append(
                                    h3d.get_dist_to_target(h3d.env.cam.pos))
                                pos_queue.append([
                                    h3d.env.cam.pos.x, h3d.env.cam.pos.y,
                                    h3d.env.cam.pos.z, h3d.env.cam.yaw
                                ])

                                if episode_done == True:
                                    break

                                # query controller to continue or not
                                controller_action_in = Variable(
                                    torch.LongTensor(1,
                                                     1).fill_(action).cuda())
                                controller_scores = nav_model.controller_step(
                                    img_feat_var, controller_action_in,
                                    planner_hidden[0])

                                prob = F.softmax(controller_scores, dim=1)
                                controller_action = int(
                                    prob.max(1)[1].data.cpu().numpy()[0])

                                if controller_action == 1 and controller_action_counter < 4:
                                    controller_action_counter += 1
                                    controller_step = True
                                else:
                                    controller_action_counter = 0
                                    controller_step = False
                                    controller_action = 0

                                controller_actions.append(controller_action)

                                action_in = torch.LongTensor(
                                    1, 1).fill_(action + 1).cuda()

                        # run answerer here
                        if len(pos_queue) < 5:
                            pos_queue = eval_loader.dataset.episode_pos_queue[len(
                                pos_queue) - 5:] + pos_queue
                        images = eval_loader.dataset.get_frames(
                            h3d, pos_queue[-5:], preprocess=True)
                        images_var = Variable(
                            torch.from_numpy(images).cuda()).view(
                                1, 5, 3, 224, 224)
                        scores, att_probs = ans_model(images_var, question_var)
                        ans_acc, ans_rank = vqa_metrics.compute_ranks(
                            scores.data.cpu(), answer)

                        pred_answer = scores.max(1)[1].data[0]

                        print('[Q_GT]', ' '.join([
                            eval_loader.dataset.vocab['questionIdxToToken'][x]
                            for x in question[0] if x != 0
                        ]))
                        print('[A_GT]', eval_loader.dataset.vocab[
                            'answerIdxToToken'][answer[0]])
                        print('[A_PRED]', eval_loader.dataset.vocab[
                            'answerIdxToToken'][pred_answer])

                        # compute stats
                        metrics_slug['accuracy_' + str(i)] = ans_acc[0]
                        metrics_slug['mean_rank_' + str(i)] = ans_rank[0]
                        metrics_slug['mean_reciprocal_rank_'
                                     + str(i)] = 1.0 / ans_rank[0]

                        metrics_slug['d_0_' + str(i)] = dists_to_target[0]
                        metrics_slug['d_T_' + str(i)] = dists_to_target[-1]
                        metrics_slug['d_D_' + str(
                            i)] = dists_to_target[0] - dists_to_target[-1]
                        metrics_slug['d_min_' + str(i)] = np.array(
                            dists_to_target).min()
                        metrics_slug['ep_len_' + str(i)] = episode_length
                        if action == 3:
                            metrics_slug['stop_' + str(i)] = 1
                        else:
                            metrics_slug['stop_' + str(i)] = 0
                        inside_room = []
                        for p in pos_queue:
                            inside_room.append(
                                h3d.is_inside_room(
                                    p, eval_loader.dataset.target_room))
                        if inside_room[-1] == True:
                            metrics_slug['r_T_' + str(i)] = 1
                        else:
                            metrics_slug['r_T_' + str(i)] = 0
                        if any([x == True for x in inside_room]) == True:
                            metrics_slug['r_e_' + str(i)] = 1
                        else:
                            metrics_slug['r_e_' + str(i)] = 0

                    # navigation metrics
                    metrics_list = []
                    for i in nav_metrics.metric_names:
                        if i not in metrics_slug:
                            metrics_list.append(nav_metrics.metrics[
                                nav_metrics.metric_names.index(i)][0])
                        else:
                            metrics_list.append(metrics_slug[i])

                    nav_metrics.update(metrics_list)

                    # vqa metrics
                    metrics_list = []
                    for i in vqa_metrics.metric_names:
                        if i not in metrics_slug:
                            metrics_list.append(vqa_metrics.metrics[
                                vqa_metrics.metric_names.index(i)][0])
                        else:
                            metrics_list.append(metrics_slug[i])

                    vqa_metrics.update(metrics_list)

                try:
                    print(nav_metrics.get_stat_string(mode=0))
                    print(vqa_metrics.get_stat_string(mode=0))
                except:
                    pass

                print('epoch', epoch)
                print('invalids', len(invalids))

                eval_loader.dataset._load_envs()
                if len(eval_loader.dataset.pruned_env_set) == 0:
                    done = True

        epoch += 1

        # checkpoint if best val accuracy
        if vqa_metrics.metrics[2][0] > best_eval_acc:  # ans_acc_50
            best_eval_acc = vqa_metrics.metrics[2][0]
            if epoch % args.eval_every == 0 and args.to_log == 1:
                vqa_metrics.dump_log()
                nav_metrics.dump_log()

                model_state = get_state(nav_model)

                aad = dict(args.__dict__)
                ad = {}
                for i in aad:
                    if i[0] != '_':
                        ad[i] = aad[i]

                checkpoint = {'args': ad, 'state': model_state, 'epoch': epoch}

                checkpoint_path = '%s/epoch_%d_ans_50_%.04f.pt' % (
                    args.checkpoint_dir, epoch, best_eval_acc)
                print('Saving checkpoint to %s' % checkpoint_path)
                torch.save(checkpoint, checkpoint_path)

        print('[best_eval_ans_acc_50:%.04f]' % best_eval_acc)

        eval_loader.dataset._load_envs(start_idx=0, in_order=True)
def fgsm(rank, args, shared_model, number):

    torch.cuda.set_device(args.gpus.index(args.gpus[rank % len(args.gpus)]))
    all_n = 0

    # torch.cuda.set_device(args.gpus.index(args.gpus[rank % len(args.gpus)]))

    model_kwargs = {'vocab': load_vocab(args.vocab_json)}
    model = VqaLstmCnnAttentionModel(**model_kwargs)
    device_ids = [0,1]

    model = model.cuda(device_ids[0])
    model = torch.nn.DataParallel(model, device_ids=device_ids)
    # torch.backends.cudnn.benchmark = True
    
    lossFn = torch.nn.CrossEntropyLoss().cuda()

    eval_loader_kwargs = {
        'questions_h5': getattr(args, args.eval_split + '_h5'),
        'data_json': args.data_json,
        'vocab': args.vocab_json,
        'target_obj_conn_map_dir': args.target_obj_conn_map_dir,
        'batch_size': args.batch_size,
        'input_type': args.input_type,
        'num_frames': args.num_frames,
        'split': args.eval_split,
        'max_threads_per_gpu': args.max_threads_per_gpu,
        'gpu_id': args.gpus[rank%len(args.gpus)],
        'to_cache': args.cache
    }

    eval_loader = EqaDataLoader(**eval_loader_kwargs)
    # for ijcai in range(number):
    #     eval_loader.dataset._load_envs()
    eval_loader.dataset._load_envs(start_idx=number)
    print('eval_loader has %d samples' % len(eval_loader.dataset))

    args.output_log_path = os.path.join(args.log_dir,
                                        'eval_' + str(rank) + '.json')

    model.load_state_dict(handle_load(shared_model.state_dict()))
    model.eval()

    metrics = VqaMetric(
        info={'split': args.eval_split},
        metric_names=[
            'loss', 'accuracy', 'mean_rank', 'mean_reciprocal_rank'
        ],
        log_json=args.output_log_path)


    all_envs_loaded = eval_loader.dataset._check_if_all_envs_loaded()
    done = False
    print(number, ' begin')
    import copy
    import torch.nn as nn
    from PIL import Image
    softmax = nn.Softmax()
    allcor = []
    while done == False:
        for ii,batch in enumerate(eval_loader):
            # model.cuda()
            if(ii>=0):
                idx, questions, answers, house, v, f, vt, pos, _, _, _ = batch
                print('all size:',v.size(),f.size(),vt.size())
                #print(house)
                print(questions, answers)
                questions_var = Variable(questions.cuda())
                print(questions_var.size())
                answers_var = Variable(answers.cuda())
                v_var = Variable(v.cuda(),requires_grad=True)
                f_var = Variable(f.cuda())
                vt_var = Variable(vt.cuda(),requires_grad=True)
                begin, end = get_info(idx[0], house[0])
                #print(vt_var[0][0][40010][0][0])  
                #print(vt_var[0][0][39506][0][0]) 
                vt_test = copy.deepcopy(vt_var)
                epsilon = 32.0/256.0
                vt_grad = torch.zeros(vt_var.size()).cuda()
                    #vt_var.retain_grad()
                scores, att_probs,img_clean = model(v_var, f_var, vt_var, pos, questions_var)
                i1, i2 = torch.max(scores[0],0)
                mi = i2.cpu().numpy().tolist()
                ms = int(answers)
                print(mi)
                print(mi==int(ms))
                if(mi==int(ms)):
                    allcor.append(1.0)
                else:
                    allcor.append(0.0)
                print(softmax(scores[0]))
                print(softmax(scores[0])[ms])
                img_clean = img_clean[0]
                for iii in range(img_clean.size()[0]):
                    imggg = img_clean[iii].detach().cpu().numpy()
                    imggg = imggg * 255.0
                    imggg = imggg.transpose((1,2,0))
                    imggg = Image.fromarray(imggg.astype('uint8'))
                    imggg.save('result_test/'+str(ii)+'_'+str(iii)+'_clean.jpg')
                loss = lossFn(scores, answers_var)
                loss.backward()
                #print(torch.max(vt_grad))
                vt_grad = vt_var.grad
                v_grad = v_var.grad
                print('max grad',torch.max(vt_grad.data))
                #print(vt_grad[0][0][40010][0][0])
                #print(vt_grad[0][0][39506][0][0])
                vt_var = vt_var.detach() + epsilon * torch.sign(vt_grad)
                #v_var = v_var.detach() + 1.0 * torch.sign(v_grad)
                vt_var = torch.clamp(vt_var, 0, 1)
                #vt_var = Variable(vt_var.data, requires_grad=True).cuda()
                with torch.no_grad():
                    model.eval()
                    begin, end = get_info(idx[0], house[0]) 
                    #for iii in range(begin,end):
                        #if(vt_test[0][0][iii][0][0][0][0] != vt_var[0][0][iii][0][0][0][0] or vt_test[0][0][iii][0][0][1][0] != vt_var[0][0][iii][0][0][1][0] or vt_test[0][0][iii][0][1][1][0] != vt_var[0][0][iii][0][1][1][0]):
                            #print(iii)
                    vt_test[0][0][begin:end] = vt_var[0][0][begin:end]
                    #print(vt_test[0][0][40010][0][0])
                    #print(vt_test[0][0][39506][0][0])
                    #print((vt_test[0][0] == vt_var[0][0]).sum())
                    #print((vt_test[0][0].size()),(vt_var[0][0].size()))
                    scores, att_probs,imgg = model(v_var, f_var, vt_test, pos, questions_var)
                    imgg = imgg[0]
                    #print(imgg.size())
                    for iii in range(imgg.size()[0]):
                        imggg = imgg[iii].detach().cpu().numpy()
                        imggg = imggg * 255.0
                        imggg = imggg.transpose((1,2,0))
                        imggg = Image.fromarray(imggg.astype('uint8'))
                        imggg.save('result_test/'+str(ii)+'_'+str(iii)+'_adv.jpg')
                    i1, i2 = torch.max(scores[0],0)
                    mi = i2.cpu().numpy().tolist()
                    ms = int(answers)
                    print(mi)
                    print(mi==int(ms))
                    print(softmax(scores[0]))
                    print(softmax(scores[0])[ms])
                for k in range(idx.shape[0]):
                    begin, end = get_info(idx[k], house[k]) 
                    #print(begin, end)
                    #begin, end = find_index('167', house[k]) 
                    v_m = v_var[k][0]
                    f_m = f_var[k][0][begin:end]
                    vt_m = vt_var[k][0][begin:end]
                    nr.save_obj('/media/trs1/dataset/suncg_data/house/' + house[k] + '/attack_' + str(int(idx[k])) + '.obj', v_m, f_m, vt_m)
                    idx = idx.cpu()
                    questions = questions.cpu()
                    answers = answers.cpu()
                    questions_var = questions_var.cpu()
                    answers_var = answers_var.cpu()
                    v = v.cpu()
                    f = f.cpu()
                    vt = vt.cpu()
                    v_m = v_m.cpu()
                    f_m = f_m.cpu()
                    vt_m = vt_m.cpu()
                    v_var = v_var.detach().cpu()
                    f_var = f_var.cpu()
                    vt_var = vt_var.detach().cpu()
                    vt_grad = vt_grad.cpu()
                    print(house[k] + ' ' + str(int(idx[k])) + ' ok')
                    all_n += 1
                    # handle_file(path)

            if all_envs_loaded == False:
                eval_loader.dataset._load_envs()
                if len(eval_loader.dataset.pruned_env_set) == 0:
                    done = True
            else:
                done = True
        print(allcor)
        print(number, ' over')
Exemplo n.º 8
0
def test(rank):

    cnn_model_dir = os.path.abspath("../train/models/03_13_h3d_hybrid_cnn.pt")

    vqa_model_kwargs = {
        'vocab': load_vocab(args.vocab_json),
        'checkpoint_path': cnn_model_dir
    }
    vqa_model = VqaLstmCnnAttentionModel(**vqa_model_kwargs)
    vqa_checkpoint = torch.load(args.vqa_weight)  #load checkpoint weights
    vqa_model.load_state_dict(vqa_checkpoint['state'])
    print('--- vqa_model loaded checkpoint ---')

    res_model_dir = os.path.abspath("../train/models/resnet101.pth")
    my_map_cnn = mapCNN(checkpoint_path=res_model_dir)
    map_checkpoint = torch.load('mapcnn.pt',
                                map_location='cpu')  #load checkpoint weights
    my_map_cnn.load_state_dict(map_checkpoint['state'])  #create map model
    print('--- map_model loaded checkpoint ---')

    cnn_kwargs = {
        'num_classes': 191,
        'pretrained': True,
        'checkpoint_path': cnn_model_dir
    }
    cnn = MultitaskCNN(**cnn_kwargs)
    cnn.eval()

    vocab_dir = os.path.abspath("vocab.json")
    vocab_file = open(vocab_dir, 'r', encoding='utf-8')
    vocab = json.load(vocab_file)

    question = args.question
    print(question)
    questionTokens = tokenize(question,
                              punctToRemove=['?'],
                              addStartToken=False)

    encoded_question_raw = encode(questionTokens, vocab['questionTokenToIdx'])
    while (len(encoded_question_raw) < 10):
        encoded_question_raw.append(0)  #encode question
    encoded_question_raw = np.array(encoded_question_raw)
    encoded_question_tensor = _dataset_to_tensor(encoded_question_raw)
    encoded_question = Variable(encoded_question_tensor)
    encoded_question = encoded_question.unsqueeze(0)

    rgb_before = cv.imread(args.rgb_image_before_dir)
    rgb_after = cv.imread(args.rgb_image_after_dir)
    depth_after = cv.imread(args.depth_image_after_dir)
    depth_after = depth_after[0]
    depth_dim = depth_after.shape
    print(depth_dim)

    rgb_after_resize = cv.resize(rgb_after, (256, 256),
                                 interpolation=cv.INTER_AREA)
    # crop and add marking
    depth_after_resize = cv.resize(depth_after, (256, 256),
                                   interpolation=cv.INTER_AREA)
    # crop and add marking

    rgb_tensor, depth_tensor = rgbd2tensor(rgb_after_resize,
                                           depth_after_resize)  #output_heatmap
    heatmap_output = rgbd2heatmap(rgb_tensor, depth_tensor, my_map_cnn)
    f = h5py.File(args.heatmap_output_dir, 'w')
    f['heatmap'] = heatmap_output

    cv.imwrite(args.rgb_image_after_dir, rgb_after_resize)
    cv.imwrite(args.depth_image_after_dir, depth_after_resize)

    before_image_feat = data2input(rgb_before)
    after_image_feat = data2input(rgb_after_resize)

    input_image = [before_image_feat, after_image_feat]
    input_image_feats = Variable(torch.FloatTensor(input_image))
    input_image_feats = input_image_feats.view(1, 2, 3, 224, 224)

    # print(input_image_feats.size())

    #print(input_image.size())
    #print(before_image_feat.size())

    scores, _ = vqa_model(input_image_feats, encoded_question)
    scores = scores.data.numpy()
    scores = scores[0]
    answer_predict = np.where(scores == np.max(scores))
    answer_predict = answer_predict[0][0]
    answer_dic = vocab["answerTokenToIdx"]
    answer = [k for k, v in answer_dic.items() if v == answer_predict]

    print(answer[0])
Exemplo n.º 9
0
def train(rank, args, shared_model, use_vision, use_language):

    gpu_idx = args.gpus.index(args.gpus[rank % len(args.gpus)])
    torch.cuda.set_device(gpu_idx)
    print("train gpu:" + str(gpu_idx) + " assigned")

    if args.input_type == 'ques':

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        model = VqaLstmModel(**model_kwargs)

    elif args.input_type == 'ques,image':

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        model = VqaLstmCnnAttentionModel(**model_kwargs)

    lossFn = torch.nn.CrossEntropyLoss().cuda()

    optim = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                    shared_model.parameters()),
                             lr=args.learning_rate)

    train_loader_kwargs = {
        'questions_h5': args.train_h5,
        'data_json': args.data_json,
        'vocab': args.vocab_json,
        'batch_size': args.batch_size,
        'input_type': args.input_type,
        'num_frames': args.num_frames,
        'split': 'train',
        'max_threads_per_gpu': args.max_threads_per_gpu,
        'gpu_id': args.gpus[rank % len(args.gpus)],
        'to_cache': args.to_cache
    }

    args.output_log_path = os.path.join(args.log_dir,
                                        'train_' + str(rank) + '.json')

    metrics = VqaMetric(
        info={
            'split': 'train',
            'thread': rank
        },
        metric_names=['loss', 'accuracy', 'mean_rank', 'mean_reciprocal_rank'],
        log_json=args.output_log_path)
    train_loader = EqaDataLoader(**train_loader_kwargs)
    if args.input_type == 'ques,image':
        train_loader.dataset._load_envs(start_idx=0, in_order=True)

    print('train_loader has %d samples' % len(train_loader.dataset))

    t, epoch = 0, 0

    while epoch < int(args.max_epochs):
        print("train gpu:" + str(gpu_idx) + " running epoch " + str(epoch))

        if args.input_type == 'ques':

            for batch in train_loader:

                t += 1

                model.load_state_dict(shared_model.state_dict())
                model.train()
                model.cuda()

                idx, questions, answers = batch

                # If not using language, replace each question with a start and end token back to back.
                if not use_language:
                    questions = torch.zeros_like(questions)
                    questions.fill_(
                        model_kwargs['vocab']['questionTokenToIdx']['<NULL>'])
                    questions[:, 0] = model_kwargs['vocab'][
                        'questionTokenToIdx']['<START>']
                    questions[:, 1] = model_kwargs['vocab'][
                        'questionTokenToIdx']['<END>']

                questions_var = Variable(questions.cuda())
                answers_var = Variable(answers.cuda())

                scores = model(questions_var)
                loss = lossFn(scores, answers_var)

                # zero grad
                optim.zero_grad()

                # update metrics
                accuracy, ranks = metrics.compute_ranks(
                    scores.data.cpu(), answers)
                metrics.update([loss.data[0], accuracy, ranks, 1.0 / ranks])

                # backprop and update
                loss.backward()

                ensure_shared_grads(model.cpu(), shared_model)
                optim.step()

                if t % args.print_every == 0:
                    print(metrics.get_stat_string())
                    if args.to_log == 1:
                        metrics.dump_log()

        elif args.input_type == 'ques,image':

            done = False
            all_envs_loaded = train_loader.dataset._check_if_all_envs_loaded()

            while done == False:

                for batch in train_loader:

                    t += 1

                    model.load_state_dict(shared_model.state_dict())
                    model.train()
                    model.cuda()

                    idx, questions, answers, images, _, _, _ = batch

                    # If not using language, replace each question with a start and end token back to back.
                    if not use_language:
                        questions = torch.zeros_like(questions)
                        questions.fill_(model_kwargs['vocab']
                                        ['questionTokenToIdx']['<NULL>'])
                        questions[:, 0] = model_kwargs['vocab'][
                            'questionTokenToIdx']['<START>']
                        questions[:, 1] = model_kwargs['vocab'][
                            'questionTokenToIdx']['<END>']
                    # If not using vision, replace all image feature data with zeros.
                    if not use_vision:
                        images = torch.zeros_like(images)

                    questions_var = Variable(questions.cuda())
                    answers_var = Variable(answers.cuda())
                    images_var = Variable(images.cuda())

                    scores, att_probs = model(images_var, questions_var)
                    loss = lossFn(scores, answers_var)

                    # zero grad
                    optim.zero_grad()

                    # update metrics
                    accuracy, ranks = metrics.compute_ranks(
                        scores.data.cpu(), answers)
                    metrics.update(
                        [loss.data[0], accuracy, ranks, 1.0 / ranks])

                    # backprop and update
                    loss.backward()

                    ensure_shared_grads(model.cpu(), shared_model)
                    optim.step()

                    if t % args.print_every == 0:
                        print(metrics.get_stat_string())
                        if args.to_log == 1:
                            metrics.dump_log()

                if all_envs_loaded == False:
                    train_loader.dataset._load_envs(in_order=True)
                    if len(train_loader.dataset.pruned_env_set) == 0:
                        done = True
                else:
                    done = True

        # Set shared epoch when it finishes on the training side
        with open(args.identifier + '.shared_epoch.tmp', 'w') as f:
            f.write(str(epoch))

        epoch += 1
Exemplo n.º 10
0
def attack_pgd(rank, args, shared_model, number):

    torch.cuda.set_device(args.gpus.index(args.gpus[rank % len(args.gpus)]))
    all_n = 0

    model_kwargs = {'vocab': load_vocab(args.vocab_json)}
    model = VqaLstmCnnAttentionModel(**model_kwargs)
    device_ids = [0, 1]

    model = model.cuda(device_ids[0])
    model = torch.nn.DataParallel(model, device_ids=device_ids)
    # torch.backends.cudnn.benchmark = True

    lossFn = torch.nn.CrossEntropyLoss().cuda()

    eval_loader_kwargs = {
        'questions_h5': getattr(args, args.eval_split + '_h5'),
        'data_json': args.data_json,
        'vocab': args.vocab_json,
        'batch_size': args.batch_size,
        'input_type': args.input_type,
        'num_frames': args.num_frames,
        'split': args.eval_split,
        'max_threads_per_gpu': args.max_threads_per_gpu,
        'gpu_id': args.gpus[rank % len(args.gpus)],
        'to_cache': args.cache
    }

    eval_loader = EqaDataLoader(**eval_loader_kwargs)
    eval_loader.dataset._load_envs(start_idx=number)
    print('eval_loader has %d samples' % len(eval_loader.dataset))

    args.output_log_path = os.path.join(args.log_dir,
                                        'eval_' + str(rank) + '.json')

    model.load_state_dict(handle_load(shared_model.state_dict()))
    model.eval()

    metrics = VqaMetric(
        info={'split': args.eval_split},
        metric_names=['loss', 'accuracy', 'mean_rank', 'mean_reciprocal_rank'],
        log_json=args.output_log_path)

    all_envs_loaded = eval_loader.dataset._check_if_all_envs_loaded()
    done = False

    while done == False:
        for batch in eval_loader:
            # model.cuda()
            idx, questions, answers, house, v, f, vt, pos, _, _, _ = batch

            questions_var = Variable(questions.cuda())
            answers_var = Variable(answers.cuda())
            v_var = Variable(v.cuda(), requires_grad=True)
            f_var = Variable(f.cuda())
            vt_var = Variable(vt.cuda(), requires_grad=True)

            attack_iter = 10
            attack_momentum = 1
            alpha = 2.0 / 255
            epsilon = 16.0 / 255
            vt_grad = torch.zeros(vt_var.size()).cuda()
            for j in range(attack_iter):
                scores, att_probs = model(v_var, f_var, vt_var, pos,
                                          questions_var, j, str(int(idx[0])))

                loss = lossFn(scores, answers_var)
                loss.backward()
                vg = vt_var.grad

                begin, end, _ = get_info(idx[0], house[0])
                if (begin == 1000 and end == 2000):
                    print(str(int(idx[0])), 'error')
                vg[0][0][:begin] = 0
                vg[0][0][end:] = 0

                noise = attack_momentum * vt_grad + vg
                vt_grad = noise
                vt_var = vt_var.detach() + alpha * torch.sign(noise)
                vt_var = torch.where(vt_var > vt + epsilon, vt + epsilon,
                                     vt_var)
                vt_var = torch.clamp(vt_var, 0, 1)
                vt_var = torch.where(vt_var < vt - epsilon, vt - epsilon,
                                     vt_var)
                vt_var = torch.clamp(vt_var, 0, 1)
                vt_var = Variable(vt_var.data, requires_grad=True).cuda()

            with torch.no_grad():
                model.eval()
                scores, att_probs = model(v_var, f_var, vt_var, pos,
                                          questions_var, 100, str(int(idx[0])))
                accuracy, ranks = metrics.compute_ranks(
                    scores.data.cpu(), answers)

            begin, end, oid = get_info(idx[0], house[0])
            v_m = v_var[0][0]
            f_m = f_var[0][0][begin:end]
            vt_m = vt_var[0][0][begin:end]
            nr.save_obj(
                '/path/to/data/house/' + house[0] + '/attack_' +
                str(int(idx[0])) + '_' + str(oid) + '.obj', v_m, f_m, vt_m)
        if all_envs_loaded == False:
            eval_loader.dataset._load_envs()
            if len(eval_loader.dataset.pruned_env_set) == 0:
                done = True
        else:
            done = True
Exemplo n.º 11
0
def attack_fgsm(rank, args, shared_model, number):  #?

    torch.cuda.set_device(args.gpus.index(args.gpus[rank % len(args.gpus)]))
    all_n = 0

    # torch.cuda.set_device(args.gpus.index(args.gpus[rank % len(args.gpus)]))

    model_kwargs = {'vocab': load_vocab(args.vocab_json)}
    model = VqaLstmCnnAttentionModel(**model_kwargs)
    device_ids = [0, 1]

    model = model.cuda(device_ids[0])
    model = torch.nn.DataParallel(model, device_ids=device_ids)
    # torch.backends.cudnn.benchmark = True

    lossFn = torch.nn.CrossEntropyLoss().cuda()

    eval_loader_kwargs = {
        'questions_h5': getattr(args, args.eval_split + '_h5'),
        'data_json': args.data_json,
        'vocab': args.vocab_json,
        'batch_size': args.batch_size,
        'input_type': args.input_type,
        'num_frames': args.num_frames,
        'split': args.eval_split,
        'max_threads_per_gpu': args.max_threads_per_gpu,
        'gpu_id': args.gpus[rank % len(args.gpus)],
        'to_cache': args.cache
    }

    eval_loader = EqaDataLoader(**eval_loader_kwargs)
    # for ijcai in range(number):
    #     eval_loader.dataset._load_envs()
    eval_loader.dataset._load_envs(start_idx=number)
    print('eval_loader has %d samples' % len(eval_loader.dataset))

    args.output_log_path = os.path.join(args.log_dir,
                                        'eval_' + str(rank) + '.json')

    model.load_state_dict(handle_load(shared_model.state_dict()))
    model.eval()

    metrics = VqaMetric(
        info={'split': args.eval_split},
        metric_names=['loss', 'accuracy', 'mean_rank', 'mean_reciprocal_rank'],
        log_json=args.output_log_path)

    all_envs_loaded = eval_loader.dataset._check_if_all_envs_loaded()
    done = False
    while done == False:
        for batch in eval_loader:
            idx, questions, answers, house, v, f, vt, pos, _, _, _ = batch
            questions_var = Variable(questions.cuda())
            answers_var = Variable(answers.cuda())
            v_var = Variable(v.cuda(), requires_grad=True)
            f_var = Variable(f.cuda())
            vt_var = Variable(vt.cuda(), requires_grad=True)

            # noise level
            epsilon = 12.0 / 255.0
            scores, att_probs = model(v_var, f_var, vt_var, pos, questions_var,
                                      0, '0')
            loss = lossFn(scores, answers_var)
            loss.backward()

            # get grad for attack
            vt_grad = vt_var.grad
            vt_detach = vt_var.detach()

            begin, end, oid = get_info(idx[0], house[0])
            if (begin == 1000 and end == 2000):
                print(str(int(idx[0])), 'error')
            vt_grad[0][0][:begin] = 0
            vt_grad[0][0][end:] = 0
            vt_var = vt_detach + epsilon * torch.sign(vt_grad)
            for k in range(idx.shape[0]):
                begin, end, oid = get_info(idx[k], house[k])
                v_m = v_var[k][0]
                f_m = f_var[k][0][begin:end]
                vt_m = vt_var[k][0][begin:end]
                # save changed object to .obj file
                nr.save_obj(
                    '/path/to/data/house/' + house[k] + '/attack_' +
                    str(int(idx[k])) + '_' + str(oid) + '.obj', v_m, f_m, vt_m)
            with torch.no_grad():
                model.eval()
                scores, att_probs = model(v_var, f_var, vt_var, pos,
                                          questions_var, 0, '0')
                accuracy, ranks = metrics.compute_ranks(
                    scores.data.cpu(), answers)

        if all_envs_loaded == False:
            eval_loader.dataset._load_envs()
            if len(eval_loader.dataset.pruned_env_set) == 0:
                done = True
        else:
            done = True
Exemplo n.º 12
0
def train(rank, args, shared_model):

    torch.cuda.set_device(args.gpus.index(args.gpus[rank % len(args.gpus)]))

    if args.input_type == 'ques':

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        model = VqaLstmModel(**model_kwargs)

    elif args.input_type == 'ques,image':

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        model = VqaLstmCnnAttentionModel(**model_kwargs)

    lossFn = torch.nn.CrossEntropyLoss().cuda()

    optim = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                    shared_model.parameters()),
                             lr=args.learning_rate)

    train_loader_kwargs = {
        'questions_h5': args.train_h5,
        'data_json': args.data_json,
        'vocab': args.vocab_json,
        'batch_size': args.batch_size,
        'input_type': args.input_type,
        'num_frames': args.num_frames,
        'split': 'train',
        'max_threads_per_gpu': args.max_threads_per_gpu,
        'gpu_id': args.gpus[rank % len(args.gpus)],
        'to_cache': args.to_cache
    }

    eval_loader_kwargs = {
        'questions_h5': getattr(args, args.eval_split + '_h5'),
        'data_json': args.data_json,
        'vocab': args.vocab_json,
        'batch_size': 1,
        'input_type': args.input_type,
        'num_frames': args.num_frames,
        'split': args.eval_split,
        'max_threads_per_gpu': args.max_threads_per_gpu,
        'gpu_id': args.gpus[rank % len(args.gpus)],
        'to_cache': args.to_cache
    }

    args.output_log_path = os.path.join(args.log_dir,
                                        'train_' + str(rank) + '.json')

    metrics = VqaMetric(
        info={
            'split': 'train',
            'thread': rank
        },
        metric_names=['loss', 'accuracy', 'mean_rank', 'mean_reciprocal_rank'],
        log_json=args.output_log_path)

    eval_loader = EqaDataLoader(**eval_loader_kwargs)
    train_loader = EqaDataLoader(**train_loader_kwargs)
    if args.input_type == 'ques,image':
        train_loader.dataset._load_envs(start_idx=0, in_order=True)

    print('train_loader has %d samples' % len(train_loader.dataset))

    t, epoch, best_eval_acc = 0, 0, 0

    while epoch < int(args.max_epochs):

        if args.input_type == 'ques':

            for batch in train_loader:

                t += 1

                model.load_state_dict(shared_model.state_dict())
                model.train()
                model.cuda()

                idx, questions, answers = batch

                questions_var = Variable(questions.cuda())
                answers_var = Variable(answers.cuda())

                scores = model(questions_var)
                loss = lossFn(scores, answers_var)

                # zero grad
                optim.zero_grad()

                # update metrics
                accuracy, ranks = metrics.compute_ranks(
                    scores.data.cpu(), answers)
                metrics.update([loss.data[0], accuracy, ranks, 1.0 / ranks])

                # backprop and update
                loss.backward()

                ensure_shared_grads(model.cpu(), shared_model)
                optim.step()

                if t % args.print_every == 0:
                    print(metrics.get_stat_string())
                    if args.to_log == 1:
                        metrics.dump_log()

        elif args.input_type == 'ques,image':

            done = False
            all_envs_loaded = train_loader.dataset._check_if_all_envs_loaded()

            while done == False:

                for batch in train_loader:

                    t += 1

                    model.load_state_dict(shared_model.state_dict())
                    model.train()
                    model.cuda()

                    idx, questions, answers, images, _, _, _ = batch

                    questions_var = Variable(questions.cuda())
                    answers_var = Variable(answers.cuda())
                    images_var = Variable(images.cuda())

                    scores, att_probs = model(images_var, questions_var)
                    loss = lossFn(scores, answers_var)

                    # zero grad
                    optim.zero_grad()

                    # update metrics
                    # accuracy, ranks = metrics.compute_ranks(scores.data.cpu(), answers)
                    # metrics.update([loss.data[0], accuracy, ranks, 1.0 / ranks])

                    # backprop and update
                    loss.backward()

                    ensure_shared_grads(model.cpu(), shared_model)
                    optim.step()

                    #if t % args.print_every == 0:
                    #    print(metrics.get_stat_string())
                    #    if args.to_log == 1:
                    #        metrics.dump_log()

                if all_envs_loaded == False:
                    train_loader.dataset._load_envs(in_order=True)
                    if len(train_loader.dataset.pruned_env_set) == 0:
                        done = True
                else:
                    done = True

            env_done = False
            env_all_envs_loaded = eval_loader.dataset._check_if_all_envs_loaded(
            )

            while env_done == False:
                _loss, _accuracy, _ranks = None, None, None
                for batch in eval_loader:
                    t += 1

                    model.cuda()

                    idx, questions, answers, images, _, _, _ = batch

                    questions_var = Variable(questions.cuda())
                    answers_var = Variable(answers.cuda())
                    images_var = Variable(images.cuda())

                    scores, att_probs = model(images_var, questions_var)
                    loss = lossFn(scores, answers_var)

                    # update metrics
                    accuracy, ranks = metrics.compute_ranks(
                        scores.data.cpu(), answers)

                if _loss is None:
                    _loss = loss.data[0]
                    _accuracy = accuracy
                    _ranks = ranks
                else:
                    _loss = torch.cat([_loss, loss.data[0]])
                    _accuracy = torch.cat([_accuracy, accuracy])
                    _ranks = torch.cat([_ranks, ranks])

                metrics.update([loss.data[0], accuracy, ranks, 1.0 / ranks])

                print(metrics.get_stat_string(mode=0))

                if env_all_envs_loaded == False:
                    eval_loader.dataset._load_envs()
                    if len(eval_loader.dataset.pruned_env_set) == 0:
                        env_done = True
                else:
                    env_done = True

        epoch += 1

        # checkpoint if best val accuracy
        if metrics.metrics[1][0] > best_eval_acc:
            best_eval_acc = metrics.metrics[1][0]
            if epoch % args.eval_every == 0 and args.to_log == 1:
                metrics.dump_log()

                model_state = get_state(model)

                if args.checkpoint_path != False:
                    ad = checkpoint['args']
                else:
                    ad = args.__dict__

                checkpoint = {'args': ad, 'state': model_state, 'epoch': epoch}

                checkpoint_path = '%s/epoch_%d_accuracy_%.04f.pt' % (
                    args.checkpoint_dir, epoch, best_eval_acc)
                print('Saving checkpoint to %s' % checkpoint_path)
                torch.save(checkpoint, checkpoint_path)

        print('[best_eval_accuracy:%.04f]' % best_eval_acc)
Exemplo n.º 13
0
def train(rank, args, shared_model):

    torch.cuda.set_device(args.gpus.index(args.gpus[rank % len(args.gpus)]))

    if args.forget_rate is None:
        forget_rate = args.noise_rate
    else:
        forget_rate = args.forget_rate

    rate_schedule = np.ones(args.max_epochs) * forget_rate
    rate_schedule[:args.num_gradual] = np.linspace(0,
                                                   forget_rate**args.exponent,
                                                   args.num_gradual)

    if args.input_type == 'ques':

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        model = VqaLstmModel(**model_kwargs)

    elif args.input_type == 'ques,image':

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        model = VqaLstmCnnAttentionModel(**model_kwargs)

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        model1 = VqaLstmCnnAttentionModel(**model_kwargs)

    # lossFn = torch.nn.CrossEntropyLoss().cuda()

    optim = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                    shared_model.parameters()),
                             lr=args.learning_rate)

    optim1 = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                     model1.parameters()),
                              lr=args.learning_rate)

    train_loader_kwargs = {
        'questions_h5': args.train_h5,
        'data_json': args.data_json,
        'vocab': args.vocab_json,
        'batch_size': args.batch_size,
        'input_type': args.input_type,
        'num_frames': args.num_frames,
        'split': 'train',
        'max_threads_per_gpu': args.max_threads_per_gpu,
        'gpu_id': args.gpus[rank % len(args.gpus)],
        'to_cache': args.cache
    }

    args.output_log_path = os.path.join(args.log_dir,
                                        'train_' + str(rank) + '.json')

    metrics = VqaMetric(
        info={
            'split': 'train',
            'thread': rank
        },
        metric_names=['loss', 'accuracy', 'mean_rank', 'mean_reciprocal_rank'],
        log_json=args.output_log_path)

    train_loader = EqaDataLoader(**train_loader_kwargs)
    if args.input_type == 'ques,image':
        train_loader.dataset._load_envs(start_idx=0, in_order=True)

    print('train_loader has %d samples' % len(train_loader.dataset))

    t, epoch = 0, 0

    while epoch < int(args.max_epochs):

        if args.input_type == 'ques':

            for batch in train_loader:

                t += 1

                model.load_state_dict(shared_model.state_dict())
                model.train()
                model.cuda()

                idx, questions, answers = batch

                questions_var = Variable(questions.cuda())
                answers_var = Variable(answers.cuda())

                scores = model(questions_var)
                loss = lossFn(scores, answers_var)

                # zero grad
                optim.zero_grad()

                # update metrics
                accuracy, ranks = metrics.compute_ranks(
                    scores.data.cpu(), answers)
                metrics.update(
                    [loss.data.item(), accuracy, ranks, 1.0 / ranks])

                # backprop and update
                loss.backward()

                ensure_shared_grads(model.cpu(), shared_model)
                optim.step()

                if t % args.print_every == 0:
                    print(metrics.get_stat_string())
                    if args.log == True:
                        metrics.dump_log()

        elif args.input_type == 'ques,image':

            done = False
            all_envs_loaded = train_loader.dataset._check_if_all_envs_loaded()

            while done == False:

                for batch in train_loader:

                    t += 1

                    model.load_state_dict(shared_model.state_dict())
                    model.train()
                    model.cnn.eval()
                    model.cuda()

                    # model1.load_state_dict(shared_model1.state_dict())
                    model1.train()
                    model1.cnn.eval()
                    model1.cuda()

                    idx, questions, answers, images, _, _, _ = batch
                    for i in range(len(answers)):
                        if random.random() < args.noise_rate:
                            tempt = random.randint(-10, 10)
                            answers[i] = answers[i] + tempt
                            while (answers[i] < 0 or answers[i] > 70):
                                answers[i] = answers[i] - tempt
                                tempt = random.randint(-10, 10)
                                answers[i] = answers[i] + tempt
                    questions_var = Variable(questions.cuda())
                    answers_var = Variable(answers.cuda())
                    images_var = Variable(images.cuda())

                    scores, att_probs = model(images_var, questions_var)
                    scores1, att_probs1 = model1(images_var, questions_var)
                    loss, loss1 = loss_coteaching(scores, scores1, answers_var,
                                                  rate_schedule[epoch])

                    # zero grad
                    optim.zero_grad()
                    optim1.zero_grad()

                    # update metrics
                    accuracy, ranks = metrics.compute_ranks(
                        scores.data.cpu(), answers)
                    metrics.update(
                        [loss.data.item(), accuracy, ranks, 1.0 / ranks])

                    # backprop and update
                    loss.backward()
                    loss1.backward()

                    ensure_shared_grads(model.cpu(), shared_model)
                    # ensure_shared_grads(model1.cpu(), shared_model1)
                    optim.step()
                    optim1.step()

                    if t % args.print_every == 0:
                        print(metrics.get_stat_string())
                        if args.log == True:
                            metrics.dump_log()

                if all_envs_loaded == False:
                    print('[CHECK][Cache:%d][Total:%d]' %
                          (len(train_loader.dataset.img_data_cache),
                           len(train_loader.dataset.env_list)))
                    train_loader.dataset._load_envs(in_order=True)
                    if len(train_loader.dataset.pruned_env_set) == 0:
                        done = True
                else:
                    done = True

        epoch += 1
Exemplo n.º 14
0
def train(rank, args, shared_nav_model, shared_ans_model):

    torch.cuda.set_device(args.gpus.index(args.gpus[rank % len(args.gpus)]))

    if args.model_type == 'pacman':

        model_kwargs = {'question_vocab': load_vocab(args.vocab_json)}
        nav_model = NavPlannerControllerModel(**model_kwargs)

    else:

        exit()

    model_kwargs = {'vocab': load_vocab(args.vocab_json)}
    ans_model = VqaLstmCnnAttentionModel(**model_kwargs)

    optim = torch.optim.SGD(
        filter(lambda p: p.requires_grad, shared_nav_model.parameters()),
        lr=args.learning_rate)

    train_loader_kwargs = {
        'questions_h5': args.train_h5,
        'data_json': args.data_json,
        'vocab': args.vocab_json,
        'target_obj_conn_map_dir': args.target_obj_conn_map_dir,
        'map_resolution': args.map_resolution,
        'batch_size': 1,
        'input_type': args.model_type,
        'num_frames': 5,
        'split': 'train',
        'max_threads_per_gpu': args.max_threads_per_gpu,
        'gpu_id': args.gpus[rank % len(args.gpus)],
        'to_cache': args.to_cache
    }

    args.output_nav_log_path = os.path.join(args.log_dir,
                                            'nav_train_' + str(rank) + '.json')
    args.output_ans_log_path = os.path.join(args.log_dir,
                                            'ans_train_' + str(rank) + '.json')

    nav_model.load_state_dict(shared_nav_model.state_dict())
    nav_model.cuda()

    ans_model.load_state_dict(shared_ans_model.state_dict())
    ans_model.eval()
    ans_model.cuda()

    nav_metrics = NavMetric(
        info={'split': 'train',
              'thread': rank},
        metric_names=[
            'planner_loss', 'controller_loss', 'reward', 'episode_length'
        ],
        log_json=args.output_nav_log_path)

    vqa_metrics = VqaMetric(
        info={'split': 'train',
              'thread': rank},
        metric_names=['accuracy', 'mean_rank', 'mean_reciprocal_rank'],
        log_json=args.output_ans_log_path)

    train_loader = EqaDataLoader(**train_loader_kwargs)

    print('train_loader has %d samples' % len(train_loader.dataset))

    t, epoch = 0, 0
    p_losses, c_losses, reward_list, episode_length_list = [], [], [], []

    nav_metrics.update([10.0, 10.0, 0, 100])

    mult = 0.1

    while epoch < int(args.max_epochs):

        if 'pacman' in args.model_type:

            planner_lossFn = MaskedNLLCriterion().cuda()
            controller_lossFn = MaskedNLLCriterion().cuda()

            done = False
            all_envs_loaded = train_loader.dataset._check_if_all_envs_loaded()

            while done == False:

                for batch in train_loader:

                    nav_model.load_state_dict(shared_nav_model.state_dict())
                    nav_model.eval()
                    nav_model.cuda()

                    idx, question, answer, actions, action_length = batch
                    metrics_slug = {}

                    h3d = train_loader.dataset.episode_house

                    # evaluate at multiple initializations
                    # for i in [10, 30, 50]:

                    t += 1

                    question_var = Variable(question.cuda())

                    controller_step = False
                    planner_hidden = nav_model.planner_nav_rnn.init_hidden(1)

                    # forward through planner till spawn
                    planner_actions_in, planner_img_feats, controller_step, controller_action_in, controller_img_feat, init_pos = train_loader.dataset.get_hierarchical_features_till_spawn(
                        actions[0, :action_length[0] + 1].numpy(),
                        max(3, int(mult * action_length[0])))

                    planner_actions_in_var = Variable(
                        planner_actions_in.cuda())
                    planner_img_feats_var = Variable(planner_img_feats.cuda())

                    for step in range(planner_actions_in.size(0)):

                        planner_scores, planner_hidden = nav_model.planner_step(
                            question_var, planner_img_feats_var[step].view(
                                1, 1, 3200), planner_actions_in_var[step].view(
                                    1, 1), planner_hidden)

                    if controller_step == True:

                        controller_img_feat_var = Variable(
                            controller_img_feat.cuda())
                        controller_action_in_var = Variable(
                            torch.LongTensor(1, 1).fill_(
                                int(controller_action_in)).cuda())

                        controller_scores = nav_model.controller_step(
                            controller_img_feat_var.view(1, 1, 3200),
                            controller_action_in_var.view(1, 1),
                            planner_hidden[0])

                        prob = F.softmax(controller_scores, dim=1)
                        controller_action = int(
                            prob.max(1)[1].data.cpu().numpy()[0])

                        if controller_action == 1:
                            controller_step = True
                        else:
                            controller_step = False

                        action = int(controller_action_in)
                        action_in = torch.LongTensor(
                            1, 1).fill_(action + 1).cuda()

                    else:

                        prob = F.softmax(planner_scores, dim=1)
                        action = int(prob.max(1)[1].data.cpu().numpy()[0])

                        action_in = torch.LongTensor(
                            1, 1).fill_(action + 1).cuda()

                    h3d.env.reset(
                        x=init_pos[0], y=init_pos[2], yaw=init_pos[3])

                    init_dist_to_target = h3d.get_dist_to_target(
                        h3d.env.cam.pos)
                    if init_dist_to_target < 0:  # unreachable
                        # invalids.append([idx[0], i])
                        continue

                    episode_length = 0
                    episode_done = True
                    controller_action_counter = 0

                    dists_to_target, pos_queue = [init_dist_to_target], [
                        init_pos
                    ]

                    rewards, planner_actions, planner_log_probs, controller_actions, controller_log_probs = [], [], [], [], []

                    if action != 3:

                        # take the first step
                        img, rwd, episode_done = h3d.step(action, step_reward=True)
                        img = torch.from_numpy(img.transpose(
                            2, 0, 1)).float() / 255.0
                        img_feat_var = train_loader.dataset.cnn(
                            Variable(img.view(1, 3, 224, 224).cuda())).view(
                                1, 1, 3200)

                        for step in range(args.max_episode_length):

                            episode_length += 1

                            if controller_step == False:
                                planner_scores, planner_hidden = nav_model.planner_step(
                                    question_var, img_feat_var,
                                    Variable(action_in), planner_hidden)

                                planner_prob = F.softmax(planner_scores, dim=1)
                                planner_log_prob = F.log_softmax(
                                    planner_scores, dim=1)

                                action = planner_prob.multinomial().data
                                planner_log_prob = planner_log_prob.gather(
                                    1, Variable(action))

                                planner_log_probs.append(
                                    planner_log_prob.cpu())

                                action = int(action.cpu().numpy()[0, 0])
                                planner_actions.append(action)

                            img, rwd, episode_done = h3d.step(action, step_reward=True)

                            episode_done = episode_done or episode_length >= args.max_episode_length

                            rewards.append(rwd)

                            img = torch.from_numpy(img.transpose(
                                2, 0, 1)).float() / 255.0
                            img_feat_var = train_loader.dataset.cnn(
                                Variable(img.view(1, 3, 224, 224)
                                         .cuda())).view(1, 1, 3200)

                            dists_to_target.append(
                                h3d.get_dist_to_target(h3d.env.cam.pos))
                            pos_queue.append([
                                h3d.env.cam.pos.x, h3d.env.cam.pos.y,
                                h3d.env.cam.pos.z, h3d.env.cam.yaw
                            ])

                            if episode_done == True:
                                break

                            # query controller to continue or not
                            controller_action_in = Variable(
                                torch.LongTensor(1, 1).fill_(action).cuda())
                            controller_scores = nav_model.controller_step(
                                img_feat_var, controller_action_in,
                                planner_hidden[0])

                            controller_prob = F.softmax(
                                controller_scores, dim=1)
                            controller_log_prob = F.log_softmax(
                                controller_scores, dim=1)

                            controller_action = controller_prob.multinomial(
                            ).data

                            if int(controller_action[0]
                                   ) == 1 and controller_action_counter < 4:
                                controller_action_counter += 1
                                controller_step = True
                            else:
                                controller_action_counter = 0
                                controller_step = False
                                controller_action.fill_(0)

                            controller_log_prob = controller_log_prob.gather(
                                1, Variable(controller_action))
                            controller_log_probs.append(
                                controller_log_prob.cpu())

                            controller_action = int(
                                controller_action.cpu().numpy()[0, 0])
                            controller_actions.append(controller_action)
                            action_in = torch.LongTensor(
                                1, 1).fill_(action + 1).cuda()

                    # run answerer here
                    ans_acc = [0]
                    if action == 3:
                        if len(pos_queue) < 5:
                            pos_queue = train_loader.dataset.episode_pos_queue[len(
                                pos_queue) - 5:] + pos_queue
                        images = train_loader.dataset.get_frames(
                            h3d, pos_queue[-5:], preprocess=True)
                        images_var = Variable(
                            torch.from_numpy(images).cuda()).view(
                                1, 5, 3, 224, 224)
                        scores, att_probs = ans_model(images_var, question_var)
                        ans_acc, ans_rank = vqa_metrics.compute_ranks(
                            scores.data.cpu(), answer)
                        vqa_metrics.update([ans_acc, ans_rank, 1.0 / ans_rank])

                    rewards.append(h3d.success_reward * ans_acc[0])

                    R = torch.zeros(1, 1)

                    planner_loss = 0
                    controller_loss = 0

                    planner_rev_idx = -1
                    for i in reversed(range(len(rewards))):
                        R = 0.99 * R + rewards[i]
                        advantage = R - nav_metrics.metrics[2][1]

                        if i < len(controller_actions):
                            controller_loss = controller_loss - controller_log_probs[i] * Variable(
                                advantage)

                            if controller_actions[i] == 0 and planner_rev_idx + len(planner_log_probs) >= 0:
                                planner_loss = planner_loss - planner_log_probs[planner_rev_idx] * Variable(
                                    advantage)
                                planner_rev_idx -= 1

                        elif planner_rev_idx + len(planner_log_probs) >= 0:

                            planner_loss = planner_loss - planner_log_probs[planner_rev_idx] * Variable(
                                advantage)
                            planner_rev_idx -= 1

                    controller_loss /= max(1, len(controller_log_probs))
                    planner_loss /= max(1, len(planner_log_probs))

                    optim.zero_grad()

                    if isinstance(planner_loss, float) == False and isinstance(
                            controller_loss, float) == False:
                        p_losses.append(planner_loss.data[0, 0])
                        c_losses.append(controller_loss.data[0, 0])
                        reward_list.append(np.sum(rewards))
                        episode_length_list.append(episode_length)

                        (planner_loss + controller_loss).backward()

                        ensure_shared_grads(nav_model.cpu(), shared_nav_model)
                        optim.step()

                    if len(reward_list) > 50:

                        nav_metrics.update([
                            p_losses, c_losses, reward_list,
                            episode_length_list
                        ])

                        print(nav_metrics.get_stat_string())
                        if args.to_log == 1:
                            nav_metrics.dump_log()

                        if nav_metrics.metrics[2][1] > 0.35:
                            mult = min(mult + 0.1, 1.0)

                        p_losses, c_losses, reward_list, episode_length_list = [], [], [], []

                if all_envs_loaded == False:
                    train_loader.dataset._load_envs(in_order=True)
                    if len(train_loader.dataset.pruned_env_set) == 0:
                        done = True
                        if args.to_cache == False:
                            train_loader.dataset._load_envs(
                                start_idx=0, in_order=True)
                else:
                    done = True

        epoch += 1
Exemplo n.º 15
0
def eval(rank, args, shared_nav_model, shared_ans_model):

    torch.cuda.set_device(args.gpus.index(args.gpus[rank % len(args.gpus)]))

    if args.model_type == 'pacman':

        model_kwargs = {'question_vocab': load_vocab(args.vocab_json)}
        nav_model = NavPlannerControllerModel(**model_kwargs)

    else:

        exit()

    model_kwargs = {'vocab': load_vocab(args.vocab_json)}
    ans_model = VqaLstmCnnAttentionModel(**model_kwargs)

    eval_loader_kwargs = {
        'questions_h5': getattr(args, args.eval_split + '_h5'),
        'data_json': args.data_json,
        'vocab': args.vocab_json,
        'target_obj_conn_map_dir': args.target_obj_conn_map_dir,
        'map_resolution': args.map_resolution,
        'batch_size': 1,
        'input_type': args.model_type,
        'num_frames': 5,
        'split': args.eval_split,
        'max_threads_per_gpu': args.max_threads_per_gpu,
        'gpu_id': args.gpus[rank % len(args.gpus)],
        'to_cache': False
    }

    eval_loader = EqaDataLoader(**eval_loader_kwargs)
    print('eval_loader has %d samples' % len(eval_loader.dataset))

    args.output_nav_log_path = os.path.join(args.log_dir,
                                            'nav_eval_' + str(rank) + '.json')
    args.output_ans_log_path = os.path.join(args.log_dir,
                                            'ans_eval_' + str(rank) + '.json')

    t, epoch, best_eval_acc = 0, 0, 0.0

    while epoch < int(args.max_epochs):

        start_time = time.time()
        invalids = []

        nav_model.load_state_dict(shared_nav_model.state_dict())
        nav_model.eval()

        ans_model.load_state_dict(shared_ans_model.state_dict())
        ans_model.eval()
        ans_model.cuda()

        # that's a lot of numbers
        nav_metrics = NavMetric(
            info={
                'split': args.eval_split,
                'thread': rank
            },
            metric_names=[
                'd_0_10', 'd_0_30', 'd_0_50', 'd_T_10', 'd_T_30', 'd_T_50',
                'd_D_10', 'd_D_30', 'd_D_50', 'd_min_10', 'd_min_30',
                'd_min_50', 'r_T_10', 'r_T_30', 'r_T_50', 'r_e_10', 'r_e_30',
                'r_e_50', 'stop_10', 'stop_30', 'stop_50', 'ep_len_10',
                'ep_len_30', 'ep_len_50'
            ],
            log_json=args.output_nav_log_path)

        vqa_metrics = VqaMetric(
            info={
                'split': args.eval_split,
                'thread': rank
            },
            metric_names=[
                'accuracy_10', 'accuracy_30', 'accuracy_50', 'mean_rank_10',
                'mean_rank_30', 'mean_rank_50', 'mean_reciprocal_rank_10',
                'mean_reciprocal_rank_30', 'mean_reciprocal_rank_50'
            ],
            log_json=args.output_ans_log_path)

        if 'pacman' in args.model_type:

            done = False

            while done == False:

                for batch in tqdm(eval_loader):

                    nav_model.load_state_dict(shared_nav_model.state_dict())
                    nav_model.eval()
                    nav_model.cuda()

                    idx, question, answer, actions, action_length = batch
                    metrics_slug = {}

                    h3d = eval_loader.dataset.episode_house

                    # evaluate at multiple initializations
                    for i in [10, 30, 50]:

                        t += 1

                        if i > action_length[0]:
                            invalids.append([idx[0], i])
                            continue

                        question_var = Variable(question.cuda())

                        controller_step = False
                        planner_hidden = nav_model.planner_nav_rnn.init_hidden(
                            1)

                        # forward through planner till spawn
                        planner_actions_in, planner_img_feats, controller_step, controller_action_in, controller_img_feat, init_pos = eval_loader.dataset.get_hierarchical_features_till_spawn(
                            actions[0, :action_length[0] + 1].numpy(), i)

                        planner_actions_in_var = Variable(
                            planner_actions_in.cuda())
                        planner_img_feats_var = Variable(
                            planner_img_feats.cuda())

                        for step in range(planner_actions_in.size(0)):

                            planner_scores, planner_hidden = nav_model.planner_step(
                                question_var,
                                planner_img_feats_var[step].view(1, 1, 3200),
                                planner_actions_in_var[step].view(1, 1),
                                planner_hidden)

                        if controller_step == True:

                            controller_img_feat_var = Variable(
                                controller_img_feat.cuda())
                            controller_action_in_var = Variable(
                                torch.LongTensor(1, 1).fill_(
                                    int(controller_action_in)).cuda())

                            controller_scores = nav_model.controller_step(
                                controller_img_feat_var.view(1, 1, 3200),
                                controller_action_in_var.view(1, 1),
                                planner_hidden[0])

                            prob = F.softmax(controller_scores, dim=1)
                            controller_action = int(
                                prob.max(1)[1].data.cpu().numpy()[0])

                            if controller_action == 1:
                                controller_step = True
                            else:
                                controller_step = False

                            action = int(controller_action_in)
                            action_in = torch.LongTensor(1, 1).fill_(action +
                                                                     1).cuda()

                        else:

                            prob = F.softmax(planner_scores, dim=1)
                            action = int(prob.max(1)[1].data.cpu().numpy()[0])

                            action_in = torch.LongTensor(1, 1).fill_(action +
                                                                     1).cuda()

                        h3d.env.reset(x=init_pos[0],
                                      y=init_pos[2],
                                      yaw=init_pos[3])

                        init_dist_to_target = h3d.get_dist_to_target(
                            h3d.env.cam.pos)
                        if init_dist_to_target < 0:  # unreachable
                            invalids.append([idx[0], i])
                            continue

                        episode_length = 0
                        episode_done = True
                        controller_action_counter = 0

                        dists_to_target, pos_queue, pred_actions = [
                            init_dist_to_target
                        ], [init_pos], []
                        planner_actions, controller_actions = [], []

                        if action != 3:

                            # take the first step
                            img, _, _ = h3d.step(action)
                            img = torch.from_numpy(img.transpose(
                                2, 0, 1)).float() / 255.0
                            img_feat_var = eval_loader.dataset.cnn(
                                Variable(img.view(1, 3, 224,
                                                  224).cuda())).view(
                                                      1, 1, 3200)

                            for step in range(args.max_episode_length):

                                episode_length += 1

                                if controller_step == False:
                                    planner_scores, planner_hidden = nav_model.planner_step(
                                        question_var, img_feat_var,
                                        Variable(action_in), planner_hidden)

                                    prob = F.softmax(planner_scores, dim=1)
                                    action = int(
                                        prob.max(1)[1].data.cpu().numpy()[0])
                                    planner_actions.append(action)

                                pred_actions.append(action)
                                img, _, episode_done = h3d.step(action)

                                episode_done = episode_done or episode_length >= args.max_episode_length

                                img = torch.from_numpy(img.transpose(
                                    2, 0, 1)).float() / 255.0
                                img_feat_var = eval_loader.dataset.cnn(
                                    Variable(img.view(1, 3, 224,
                                                      224).cuda())).view(
                                                          1, 1, 3200)

                                dists_to_target.append(
                                    h3d.get_dist_to_target(h3d.env.cam.pos))
                                pos_queue.append([
                                    h3d.env.cam.pos.x, h3d.env.cam.pos.y,
                                    h3d.env.cam.pos.z, h3d.env.cam.yaw
                                ])

                                if episode_done == True:
                                    break

                                # query controller to continue or not
                                controller_action_in = Variable(
                                    torch.LongTensor(1,
                                                     1).fill_(action).cuda())
                                controller_scores = nav_model.controller_step(
                                    img_feat_var, controller_action_in,
                                    planner_hidden[0])

                                prob = F.softmax(controller_scores, dim=1)
                                controller_action = int(
                                    prob.max(1)[1].data.cpu().numpy()[0])

                                if controller_action == 1 and controller_action_counter < 4:
                                    controller_action_counter += 1
                                    controller_step = True
                                else:
                                    controller_action_counter = 0
                                    controller_step = False
                                    controller_action = 0

                                controller_actions.append(controller_action)

                                action_in = torch.LongTensor(
                                    1, 1).fill_(action + 1).cuda()

                        # run answerer here
                        if len(pos_queue) < 5:
                            pos_queue = eval_loader.dataset.episode_pos_queue[
                                len(pos_queue) - 5:] + pos_queue
                        images = eval_loader.dataset.get_frames(
                            h3d, pos_queue[-5:], preprocess=True)
                        images_var = Variable(
                            torch.from_numpy(images).cuda()).view(
                                1, 5, 3, 224, 224)
                        scores, att_probs = ans_model(images_var, question_var)
                        ans_acc, ans_rank = vqa_metrics.compute_ranks(
                            scores.data.cpu(), answer)

                        pred_answer = scores.max(1)[1].data[0]

                        print(
                            '[Q_GT]', ' '.join([
                                eval_loader.dataset.vocab['questionIdxToToken']
                                [x] for x in question[0] if x != 0
                            ]))
                        print(
                            '[A_GT]',
                            eval_loader.dataset.vocab['answerIdxToToken'][
                                answer[0]])
                        print(
                            '[A_PRED]',
                            eval_loader.dataset.vocab['answerIdxToToken']
                            [pred_answer])

                        # compute stats
                        metrics_slug['accuracy_' + str(i)] = ans_acc[0]
                        metrics_slug['mean_rank_' + str(i)] = ans_rank[0]
                        metrics_slug['mean_reciprocal_rank_' +
                                     str(i)] = 1.0 / ans_rank[0]

                        metrics_slug['d_0_' + str(i)] = dists_to_target[0]
                        metrics_slug['d_T_' + str(i)] = dists_to_target[-1]
                        metrics_slug[
                            'd_D_' +
                            str(i)] = dists_to_target[0] - dists_to_target[-1]
                        metrics_slug['d_min_' +
                                     str(i)] = np.array(dists_to_target).min()
                        metrics_slug['ep_len_' + str(i)] = episode_length
                        if action == 3:
                            metrics_slug['stop_' + str(i)] = 1
                        else:
                            metrics_slug['stop_' + str(i)] = 0
                        inside_room = []
                        for p in pos_queue:
                            inside_room.append(
                                h3d.is_inside_room(
                                    p, eval_loader.dataset.target_room))
                        if inside_room[-1] == True:
                            metrics_slug['r_T_' + str(i)] = 1
                        else:
                            metrics_slug['r_T_' + str(i)] = 0
                        if any([x == True for x in inside_room]) == True:
                            metrics_slug['r_e_' + str(i)] = 1
                        else:
                            metrics_slug['r_e_' + str(i)] = 0

                    # navigation metrics
                    metrics_list = []
                    for i in nav_metrics.metric_names:
                        if i not in metrics_slug:
                            metrics_list.append(nav_metrics.metrics[
                                nav_metrics.metric_names.index(i)][0])
                        else:
                            metrics_list.append(metrics_slug[i])

                    nav_metrics.update(metrics_list)

                    # vqa metrics
                    metrics_list = []
                    for i in vqa_metrics.metric_names:
                        if i not in metrics_slug:
                            metrics_list.append(vqa_metrics.metrics[
                                vqa_metrics.metric_names.index(i)][0])
                        else:
                            metrics_list.append(metrics_slug[i])

                    vqa_metrics.update(metrics_list)

                try:
                    print(nav_metrics.get_stat_string(mode=0))
                    print(vqa_metrics.get_stat_string(mode=0))
                except:
                    pass

                print('epoch', epoch)
                print('invalids', len(invalids))

                eval_loader.dataset._load_envs()
                if len(eval_loader.dataset.pruned_env_set) == 0:
                    done = True

        epoch += 1

        # checkpoint if best val accuracy
        if vqa_metrics.metrics[2][0] > best_eval_acc:  # ans_acc_50
            best_eval_acc = vqa_metrics.metrics[2][0]
            if epoch % args.eval_every == 0 and args.to_log == 1:
                vqa_metrics.dump_log()
                nav_metrics.dump_log()

                model_state = get_state(nav_model)

                aad = dict(args.__dict__)
                ad = {}
                for i in aad:
                    if i[0] != '_':
                        ad[i] = aad[i]

                checkpoint = {'args': ad, 'state': model_state, 'epoch': epoch}

                checkpoint_path = '%s/epoch_%d_ans_50_%.04f.pt' % (
                    args.checkpoint_dir, epoch, best_eval_acc)
                print('Saving checkpoint to %s' % checkpoint_path)
                torch.save(checkpoint, checkpoint_path)

        print('[best_eval_ans_acc_50:%.04f]' % best_eval_acc)

        eval_loader.dataset._load_envs(start_idx=0, in_order=True)
Exemplo n.º 16
0
def eval(rank, args, shared_model, use_vision, use_language):

    gpu_idx = args.gpus.index(args.gpus[rank % len(args.gpus)])
    torch.cuda.set_device(gpu_idx)
    print("eval gpu:" + str(gpu_idx) + " assigned")

    if args.input_type == 'ques':

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        model = VqaLstmModel(**model_kwargs)

    elif args.input_type == 'ques,image':

        model_kwargs = {'vocab': load_vocab(args.vocab_json)}
        model = VqaLstmCnnAttentionModel(**model_kwargs)

    lossFn = torch.nn.CrossEntropyLoss().cuda()

    eval_loader_kwargs = {
        'questions_h5': getattr(args, args.eval_split + '_h5'),
        'data_json': args.data_json,
        'vocab': args.vocab_json,
        'batch_size': 1,
        'input_type': args.input_type,
        'num_frames': args.num_frames,
        'split': args.eval_split,
        'max_threads_per_gpu': args.max_threads_per_gpu,
        'gpu_id': args.gpus[rank % len(args.gpus)],
        'to_cache': args.to_cache
    }

    eval_loader = EqaDataLoader(**eval_loader_kwargs)
    print('eval_loader has %d samples' % len(eval_loader.dataset))

    args.output_log_path = os.path.join(args.log_dir,
                                        'eval_' + str(rank) + '.json')

    t, epoch, best_eval_acc = 0, 0, 0

    print(epoch, args.max_epochs)  # DEBUG
    while epoch < int(args.max_epochs):
        print("eval gpu:" + str(gpu_idx) + " running epoch " + str(epoch))

        model.load_state_dict(shared_model.state_dict())
        model.eval()

        metrics = VqaMetric(info={'split': args.eval_split},
                            metric_names=[
                                'loss', 'accuracy', 'mean_rank',
                                'mean_reciprocal_rank'
                            ],
                            log_json=args.output_log_path)

        if args.input_type == 'ques':
            for batch in eval_loader:
                t += 1

                model.cuda()

                idx, questions, answers = batch

                # If not using language, replace each question with a start and end token back to back.
                if not use_language:
                    questions = torch.zeros_like(questions)
                    questions.fill_(
                        model_kwargs['vocab']['questionTokenToIdx']['<NULL>'])
                    questions[:, 0] = model_kwargs['vocab'][
                        'questionTokenToIdx']['<START>']
                    questions[:, 1] = model_kwargs['vocab'][
                        'questionTokenToIdx']['<END>']

                questions_var = Variable(questions.cuda())
                answers_var = Variable(answers.cuda())

                scores = model(questions_var)
                loss = lossFn(scores, answers_var)

                # update metrics
                accuracy, ranks = metrics.compute_ranks(
                    scores.data.cpu(), answers)
                metrics.update([loss.data[0], accuracy, ranks, 1.0 / ranks])

            print(metrics.get_stat_string(mode=0))

        elif args.input_type == 'ques,image':
            done = False
            all_envs_loaded = eval_loader.dataset._check_if_all_envs_loaded()

            while done == False:
                for batch in eval_loader:
                    t += 1

                    model.cuda()

                    idx, questions, answers, images, _, _, _ = batch

                    # If not using language, replace each question with a start and end token back to back.
                    if not use_language:
                        questions = torch.zeros_like(questions)
                        questions.fill_(model_kwargs['vocab']
                                        ['questionTokenToIdx']['<NULL>'])
                        questions[:, 0] = model_kwargs['vocab'][
                            'questionTokenToIdx']['<START>']
                        questions[:, 1] = model_kwargs['vocab'][
                            'questionTokenToIdx']['<END>']
                    # If not using vision, replace all image feature data with zeros.
                    if not use_vision:
                        images = torch.zeros_like(images)

                    questions_var = Variable(questions.cuda())
                    answers_var = Variable(answers.cuda())
                    images_var = Variable(images.cuda())

                    scores, att_probs = model(images_var, questions_var)
                    loss = lossFn(scores, answers_var)

                    # update metrics
                    accuracy, ranks = metrics.compute_ranks(
                        scores.data.cpu(), answers)
                    metrics.update(
                        [loss.data[0], accuracy, ranks, 1.0 / ranks])

                print(metrics.get_stat_string(mode=0))

                if all_envs_loaded == False:
                    eval_loader.dataset._load_envs()
                    if len(eval_loader.dataset.pruned_env_set) == 0:
                        done = True
                else:
                    done = True

        read_epoch = None
        while read_epoch is None or epoch >= read_epoch:
            try:
                with open(args.identifier + '.shared_epoch.tmp', 'r') as f:
                    read_epoch = int(f.read().strip())
            except (IOError, ValueError):
                pass
            if read_epoch is None:
                # TODO: since merger, this no longer works (hanging); might need to undo changes re: threading that we
                # TODO: made or debug them.
                print("eval gpu:" + str(gpu_idx) +
                      " waiting for train thread to finish epoch " +
                      str(epoch))
                time.sleep(
                    10
                )  # sleep until the training thread finishes another iteration
        epoch = read_epoch

        # checkpoint if best val accuracy
        if metrics.metrics[1][0] > best_eval_acc:
            best_eval_acc = metrics.metrics[1][0]
            if epoch % args.eval_every == 0 and args.to_log == 1:
                metrics.dump_log()

                model_state = get_state(model)

                if args.checkpoint_path != False:
                    ad = checkpoint['args']
                else:
                    ad = args.__dict__

                checkpoint = {'args': ad, 'state': model_state, 'epoch': epoch}

                checkpoint_path = '%s/epoch_%d_accuracy_%.04f.pt' % (
                    args.checkpoint_dir, epoch, best_eval_acc)
                print('Saving checkpoint to %s' % checkpoint_path)
                torch.save(checkpoint, checkpoint_path)

        print('[best_eval_accuracy:%.04f]' % best_eval_acc)
Exemplo n.º 17
0
def train(rank, args, shared_nav_model, shared_ans_model):

    torch.cuda.set_device(args.gpus.index(args.gpus[rank % len(args.gpus)]))

    if args.model_type == 'pacman':

        model_kwargs = {'question_vocab': load_vocab(args.vocab_json)}
        nav_model = NavPlannerControllerModel(**model_kwargs)

    else:

        exit()

    model_kwargs = {'vocab': load_vocab(args.vocab_json)}
    ans_model = VqaLstmCnnAttentionModel(**model_kwargs)

    optim = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                   shared_nav_model.parameters()),
                            lr=args.learning_rate)

    train_loader_kwargs = {
        'questions_h5': args.train_h5,
        'data_json': args.data_json,
        'vocab': args.vocab_json,
        'target_obj_conn_map_dir': args.target_obj_conn_map_dir,
        'map_resolution': args.map_resolution,
        'batch_size': 1,
        'input_type': args.model_type,
        'num_frames': 5,
        'split': 'train',
        'max_threads_per_gpu': args.max_threads_per_gpu,
        'gpu_id': args.gpus[rank % len(args.gpus)],
        'to_cache': args.to_cache
    }

    args.output_nav_log_path = os.path.join(args.log_dir,
                                            'nav_train_' + str(rank) + '.json')
    args.output_ans_log_path = os.path.join(args.log_dir,
                                            'ans_train_' + str(rank) + '.json')

    nav_model.load_state_dict(shared_nav_model.state_dict())
    nav_model.cuda()

    ans_model.load_state_dict(shared_ans_model.state_dict())
    ans_model.eval()
    ans_model.cuda()

    nav_metrics = NavMetric(info={
        'split': 'train',
        'thread': rank
    },
                            metric_names=[
                                'planner_loss', 'controller_loss', 'reward',
                                'episode_length'
                            ],
                            log_json=args.output_nav_log_path)

    vqa_metrics = VqaMetric(
        info={
            'split': 'train',
            'thread': rank
        },
        metric_names=['accuracy', 'mean_rank', 'mean_reciprocal_rank'],
        log_json=args.output_ans_log_path)

    train_loader = EqaDataLoader(**train_loader_kwargs)

    print('train_loader has %d samples' % len(train_loader.dataset))

    t, epoch = 0, 0
    p_losses, c_losses, reward_list, episode_length_list = [], [], [], []

    nav_metrics.update([10.0, 10.0, 0, 100])

    mult = 0.1

    while epoch < int(args.max_epochs):

        if 'pacman' in args.model_type:

            planner_lossFn = MaskedNLLCriterion().cuda()
            controller_lossFn = MaskedNLLCriterion().cuda()

            done = False
            all_envs_loaded = train_loader.dataset._check_if_all_envs_loaded()

            while done == False:

                for batch in train_loader:

                    nav_model.load_state_dict(shared_nav_model.state_dict())
                    nav_model.eval()
                    nav_model.cuda()

                    idx, question, answer, actions, action_length = batch
                    metrics_slug = {}

                    h3d = train_loader.dataset.episode_house

                    # evaluate at multiple initializations
                    # for i in [10, 30, 50]:

                    t += 1

                    question_var = Variable(question.cuda())

                    controller_step = False
                    planner_hidden = nav_model.planner_nav_rnn.init_hidden(1)

                    # forward through planner till spawn
                    planner_actions_in, planner_img_feats, controller_step, controller_action_in, controller_img_feat, init_pos = train_loader.dataset.get_hierarchical_features_till_spawn(
                        actions[0, :action_length[0] + 1].numpy(),
                        max(3, int(mult * action_length[0])))

                    planner_actions_in_var = Variable(
                        planner_actions_in.cuda())
                    planner_img_feats_var = Variable(planner_img_feats.cuda())

                    for step in range(planner_actions_in.size(0)):

                        planner_scores, planner_hidden = nav_model.planner_step(
                            question_var,
                            planner_img_feats_var[step].view(1, 1, 3200),
                            planner_actions_in_var[step].view(1, 1),
                            planner_hidden)

                    if controller_step == True:

                        controller_img_feat_var = Variable(
                            controller_img_feat.cuda())
                        controller_action_in_var = Variable(
                            torch.LongTensor(1, 1).fill_(
                                int(controller_action_in)).cuda())

                        controller_scores = nav_model.controller_step(
                            controller_img_feat_var.view(1, 1, 3200),
                            controller_action_in_var.view(1, 1),
                            planner_hidden[0])

                        prob = F.softmax(controller_scores, dim=1)
                        controller_action = int(
                            prob.max(1)[1].data.cpu().numpy()[0])

                        if controller_action == 1:
                            controller_step = True
                        else:
                            controller_step = False

                        action = int(controller_action_in)
                        action_in = torch.LongTensor(1, 1).fill_(action +
                                                                 1).cuda()

                    else:

                        prob = F.softmax(planner_scores, dim=1)
                        action = int(prob.max(1)[1].data.cpu().numpy()[0])

                        action_in = torch.LongTensor(1, 1).fill_(action +
                                                                 1).cuda()

                    h3d.env.reset(x=init_pos[0],
                                  y=init_pos[2],
                                  yaw=init_pos[3])

                    init_dist_to_target = h3d.get_dist_to_target(
                        h3d.env.cam.pos)
                    if init_dist_to_target < 0:  # unreachable
                        # invalids.append([idx[0], i])
                        continue

                    episode_length = 0
                    episode_done = True
                    controller_action_counter = 0

                    dists_to_target, pos_queue = [init_dist_to_target
                                                  ], [init_pos]

                    rewards, planner_actions, planner_log_probs, controller_actions, controller_log_probs = [], [], [], [], []

                    if action != 3:

                        # take the first step
                        img, rwd, episode_done = h3d.step(action,
                                                          step_reward=True)
                        img = torch.from_numpy(img.transpose(
                            2, 0, 1)).float() / 255.0
                        img_feat_var = train_loader.dataset.cnn(
                            Variable(img.view(1, 3, 224,
                                              224).cuda())).view(1, 1, 3200)

                        for step in range(args.max_episode_length):

                            episode_length += 1

                            if controller_step == False:
                                planner_scores, planner_hidden = nav_model.planner_step(
                                    question_var, img_feat_var,
                                    Variable(action_in), planner_hidden)

                                planner_prob = F.softmax(planner_scores, dim=1)
                                planner_log_prob = F.log_softmax(
                                    planner_scores, dim=1)

                                action = planner_prob.multinomial().data
                                planner_log_prob = planner_log_prob.gather(
                                    1, Variable(action))

                                planner_log_probs.append(
                                    planner_log_prob.cpu())

                                action = int(action.cpu().numpy()[0, 0])
                                planner_actions.append(action)

                            img, rwd, episode_done = h3d.step(action,
                                                              step_reward=True)

                            episode_done = episode_done or episode_length >= args.max_episode_length

                            rewards.append(rwd)

                            img = torch.from_numpy(img.transpose(
                                2, 0, 1)).float() / 255.0
                            img_feat_var = train_loader.dataset.cnn(
                                Variable(img.view(1, 3, 224,
                                                  224).cuda())).view(
                                                      1, 1, 3200)

                            dists_to_target.append(
                                h3d.get_dist_to_target(h3d.env.cam.pos))
                            pos_queue.append([
                                h3d.env.cam.pos.x, h3d.env.cam.pos.y,
                                h3d.env.cam.pos.z, h3d.env.cam.yaw
                            ])

                            if episode_done == True:
                                break

                            # query controller to continue or not
                            controller_action_in = Variable(
                                torch.LongTensor(1, 1).fill_(action).cuda())
                            controller_scores = nav_model.controller_step(
                                img_feat_var, controller_action_in,
                                planner_hidden[0])

                            controller_prob = F.softmax(controller_scores,
                                                        dim=1)
                            controller_log_prob = F.log_softmax(
                                controller_scores, dim=1)

                            controller_action = controller_prob.multinomial(
                            ).data

                            if int(controller_action[0]
                                   ) == 1 and controller_action_counter < 4:
                                controller_action_counter += 1
                                controller_step = True
                            else:
                                controller_action_counter = 0
                                controller_step = False
                                controller_action.fill_(0)

                            controller_log_prob = controller_log_prob.gather(
                                1, Variable(controller_action))
                            controller_log_probs.append(
                                controller_log_prob.cpu())

                            controller_action = int(
                                controller_action.cpu().numpy()[0, 0])
                            controller_actions.append(controller_action)
                            action_in = torch.LongTensor(1, 1).fill_(action +
                                                                     1).cuda()

                    # run answerer here
                    ans_acc = [0]
                    if action == 3:
                        if len(pos_queue) < 5:
                            pos_queue = train_loader.dataset.episode_pos_queue[
                                len(pos_queue) - 5:] + pos_queue
                        images = train_loader.dataset.get_frames(
                            h3d, pos_queue[-5:], preprocess=True)
                        images_var = Variable(
                            torch.from_numpy(images).cuda()).view(
                                1, 5, 3, 224, 224)
                        scores, att_probs = ans_model(images_var, question_var)
                        ans_acc, ans_rank = vqa_metrics.compute_ranks(
                            scores.data.cpu(), answer)
                        vqa_metrics.update([ans_acc, ans_rank, 1.0 / ans_rank])

                    rewards.append(h3d.success_reward * ans_acc[0])

                    R = torch.zeros(1, 1)

                    planner_loss = 0
                    controller_loss = 0

                    planner_rev_idx = -1
                    for i in reversed(range(len(rewards))):
                        R = 0.99 * R + rewards[i]
                        advantage = R - nav_metrics.metrics[2][1]

                        if i < len(controller_actions):
                            controller_loss = controller_loss - controller_log_probs[
                                i] * Variable(advantage)

                            if controller_actions[
                                    i] == 0 and planner_rev_idx + len(
                                        planner_log_probs) >= 0:
                                planner_loss = planner_loss - planner_log_probs[
                                    planner_rev_idx] * Variable(advantage)
                                planner_rev_idx -= 1

                        elif planner_rev_idx + len(planner_log_probs) >= 0:

                            planner_loss = planner_loss - planner_log_probs[
                                planner_rev_idx] * Variable(advantage)
                            planner_rev_idx -= 1

                    controller_loss /= max(1, len(controller_log_probs))
                    planner_loss /= max(1, len(planner_log_probs))

                    optim.zero_grad()

                    if isinstance(planner_loss, float) == False and isinstance(
                            controller_loss, float) == False:
                        p_losses.append(planner_loss.data[0, 0])
                        c_losses.append(controller_loss.data[0, 0])
                        reward_list.append(np.sum(rewards))
                        episode_length_list.append(episode_length)

                        (planner_loss + controller_loss).backward()

                        ensure_shared_grads(nav_model.cpu(), shared_nav_model)
                        optim.step()

                    if len(reward_list) > 50:

                        nav_metrics.update([
                            p_losses, c_losses, reward_list,
                            episode_length_list
                        ])

                        print(nav_metrics.get_stat_string())
                        if args.to_log == 1:
                            nav_metrics.dump_log()

                        if nav_metrics.metrics[2][1] > 0.35:
                            mult = min(mult + 0.1, 1.0)

                        p_losses, c_losses, reward_list, episode_length_list = [], [], [], []

                if all_envs_loaded == False:
                    train_loader.dataset._load_envs(in_order=True)
                    if len(train_loader.dataset.pruned_env_set) == 0:
                        done = True
                        if args.to_cache == False:
                            train_loader.dataset._load_envs(start_idx=0,
                                                            in_order=True)
                else:
                    done = True

        epoch += 1
Exemplo n.º 18
0
    shared_nav_model.share_memory()

    print('Loading navigation params from checkpoint: %s' %
          args.nav_checkpoint_path)
    shared_nav_model.load_state_dict(checkpoint['state'])

    # Load answering model
    print('Loading answering checkpoint from %s' % args.ans_checkpoint_path)
    ans_checkpoint = torch.load(
        args.ans_checkpoint_path, map_location={
            'cuda:0': 'cpu'
        })

    ans_model_kwargs = {'vocab': load_vocab(args.vocab_json)}
    shared_ans_model = VqaLstmCnnAttentionModel(**ans_model_kwargs)

    shared_ans_model.share_memory()

    print('Loading params from checkpoint: %s' % args.ans_checkpoint_path)
    shared_ans_model.load_state_dict(ans_checkpoint['state'])

    if args.mode == 'eval':

        eval(0, args, shared_nav_model, shared_ans_model)

    elif args.mode == 'train':

        train(0, args, shared_nav_model, shared_ans_model)

    else: