Пример #1
0
def finetune():
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    if args.fast_train:
        feat_dict = read_img_features(features_fast)
    else:
        feat_dict = read_img_features(features)

    candidate_dict = utils.read_candidates(CANDIDATE_FEATURES)
    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])

    train_env = R2RBatch(feat_dict,
                         candidate_dict,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)
    print("The finetune data_size is : %d\n" % train_env.size())
    val_envs = {
        split:
        (R2RBatch(feat_dict,
                  candidate_dict,
                  batch_size=args.batchSize,
                  splits=[split],
                  tokenizer=tok), Evaluation([split], featurized_scans, tok))
        for split in ['train', 'val_seen', 'val_unseen']
    }

    train(train_env, tok, args.iters, val_envs=val_envs)
Пример #2
0
def make_env_and_models(args, train_vocab_path, train_splits, test_splits,
                        batch_size=BATCH_SIZE):
    setup()
    image_features_list = ImageFeatures.from_args(args)
    vocab = read_vocab(train_vocab_path)
    tok = Tokenizer(vocab=vocab)
    train_env = R2RBatch(image_features_list, batch_size=batch_size,
                         splits=train_splits, tokenizer=tok)

    enc_hidden_size = hidden_size//2 if args.bidirectional else hidden_size
    glove = np.load(glove_path)
    feature_size = FEATURE_SIZE
    encoder = try_cuda(EncoderLSTM(
        len(vocab), word_embedding_size, enc_hidden_size, vocab_pad_idx,
        dropout_ratio, bidirectional=args.bidirectional, glove=glove))
    decoder = try_cuda(AttnDecoderLSTM(
        action_embedding_size, hidden_size, dropout_ratio,
        feature_size=feature_size))
    test_envs = {
        split: (R2RBatch(image_features_list, batch_size=batch_size,
                         splits=[split], tokenizer=tok),
                eval.Evaluation([split]))
        for split in test_splits}

    return train_env, test_envs, encoder, decoder
Пример #3
0
def make_env_and_models(args, train_vocab_path, train_splits, test_splits,
                        test_instruction_limit=None):
    setup()
    image_features_list = ImageFeatures.from_args(args)
    vocab = read_vocab(train_vocab_path)
    tok = Tokenizer(vocab=vocab)
    train_env = R2RBatch(image_features_list, batch_size=batch_size,
                         splits=train_splits, tokenizer=tok)

    enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
    glove = np.load(glove_path)
    feature_size = FEATURE_SIZE
    encoder = try_cuda(SpeakerEncoderLSTM(
        action_embedding_size, feature_size, enc_hidden_size, dropout_ratio,
        bidirectional=bidirectional))
    decoder = try_cuda(SpeakerDecoderLSTM(
        len(vocab), word_embedding_size, hidden_size, dropout_ratio,
        glove=glove))

    test_envs = {
        split: (R2RBatch(image_features_list, batch_size=batch_size,
                         splits=[split], tokenizer=tok,
                         instruction_limit=test_instruction_limit),
                eval_speaker.SpeakerEvaluation(
                    [split], instructions_per_path=test_instruction_limit))
        for split in test_splits}

    return train_env, test_envs, encoder, decoder
Пример #4
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''

    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    train_env = R2RBatch(features,
                         batch_size=batch_size,
                         splits=['train'],
                         tokenizer=tok)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(features,
                         batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok), Evaluation([split]))
        for split in ['val_seen', 'val_unseen']
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio).cuda()
    train(train_env, encoder, decoder, n_iters, val_envs=val_envs)
Пример #5
0
def make_env_and_models(args, train_vocab_path, train_splits, test_splits):
    setup(args.seed)
    image_features_list = ImageFeatures.from_args(args)
    if args.job == None:  # create vocab only during training (job == none)
        vocab = build_vocab(train_splits)
        write_vocab(vocab, TRAIN_VOCAB)

    vocab = read_vocab(train_vocab_path)
    tok = Tokenizer(vocab=vocab)
    train_env = R2RBatch(image_features_list,
                         batch_size=args.batch_size,
                         splits=train_splits,
                         tokenizer=tok) if len(train_splits) > 0 else None
    test_envs = {
        split: (R2RBatch(image_features_list,
                         batch_size=args.batch_size,
                         splits=[split],
                         tokenizer=tok), Evaluation(split, args.instrType))
        for split in test_splits
    }

    agent = make_follower(args, vocab)
    agent.env = train_env

    if args.useObjLabelOrVis in ['label', 'both']:
        if not train_env is None:
            agent.pointer.wtoi = train_env.wtoi
        else:
            agent.pointer.wtoi = test_envs[test_splits[0]][0].wtoi

    return train_env, test_envs, agent
Пример #6
0
def test_submission(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind):
    ''' Train on combined training and validation sets, and generate test submission. '''
  
    setup()

    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAINVAL_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok,
                         path_type=path_type, history=history, blind=blind)
    
    # Build models and train
    enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, 
                  dropout_ratio, bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(action_embedding_size, hidden_size, dropout_ratio).cuda()

    train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix)

    # Generate test submission
    test_env = R2RBatch(features, batch_size=batch_size, splits=['test'], tokenizer=tok,
                        path_type=path_type, history=history, blind=blind)
    agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len)
    agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, 'test', 5000)
    agent.test(use_dropout=False, feedback='argmax')
    agent.write_results()
Пример #7
0
def train_all(eval_type, seed, max_episode_len, max_input_length, feedback,
              n_iters, prefix, blind, debug, train_vocab, trainval_vocab,
              batch_size, action_embedding_size, target_embedding_size,
              bidirectional, dropout_ratio, weight_decay, feature_size,
              hidden_size, word_embedding_size, lr, result_dir, snapshot_dir,
              plot_dir, train_splits, test_splits):
    ''' Train on the training set, and validate on the test split. '''

    setup(seed, train_vocab, trainval_vocab)
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(train_vocab if eval_type == 'val' else trainval_vocab)
    tok = Tokenizer(vocab=vocab, encoding_length=max_input_length)
    train_env = R2RBatch(batch_size=batch_size,
                         splits=train_splits,
                         tokenizer=tok,
                         seed=seed,
                         blind=blind)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok,
                         seed=seed,
                         blind=blind), Evaluation([split], seed=seed))
        for split in test_splits
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio, feature_size).cuda()

    train(eval_type,
          train_env,
          encoder,
          decoder,
          n_iters,
          seed,
          feedback,
          max_episode_len,
          max_input_length,
          prefix,
          blind,
          lr,
          weight_decay,
          result_dir,
          snapshot_dir,
          plot_dir,
          val_envs=val_envs,
          debug=debug)
Пример #8
0
def train_val_augment():
    """
    Train the listener with the augmented data
    """
    setup()

    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    # Load the env img features
    feat_dict = read_img_features(features)
    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])

    # Load the augmentation data
    aug_path = args.aug

    # Create the training environment
    aug_env = R2RBatch(feat_dict,
                       batch_size=args.batchSize,
                       splits=[aug_path],
                       tokenizer=tok,
                       name='aug')

    # import sys
    # sys.exit()
    train_env = R2RBatch(feat_dict,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)

    # Printing out the statistics of the dataset
    stats = train_env.get_statistics()
    print("The training data_size is : %d" % train_env.size())
    print("The average instruction length of the dataset is %0.4f." %
          (stats['length']))
    print("The average action length of the dataset is %0.4f." %
          (stats['path']))
    stats = aug_env.get_statistics()
    print("The augmentation data size is %d" % aug_env.size())
    print("The average instruction length of the dataset is %0.4f." %
          (stats['length']))
    print("The average action length of the dataset is %0.4f." %
          (stats['path']))

    # Setup the validation data
    val_envs = {
        split:
        (R2RBatch(feat_dict,
                  batch_size=args.batchSize,
                  splits=[split],
                  tokenizer=tok), Evaluation([split], featurized_scans, tok))
        for split in ['train', 'val_seen', 'val_unseen']
    }

    # Start training
    train(train_env, tok, args.iters, val_envs=val_envs, aug_env=aug_env)
Пример #9
0
def train_val(test_only=False):
    ''' Train on the training set, and validate on seen and unseen splits. '''
    setup()
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    feat_dict = read_img_features(features, test_only=test_only)

    if test_only:
        featurized_scans = None
        val_env_names = ['val_train_seen']
    else:
        featurized_scans = set(
            [key.split("_")[0] for key in list(feat_dict.keys())])
        val_env_names = ['val_train_seen', 'val_seen', 'val_unseen']

    if not args.test_obj:
        print('Loading compact pano-caffe object features ... (~3 seconds)')
        import pickle as pkl
        with open('img_features/objects/pano_object_class.pkl', 'rb') as f_pc:
            pano_caffe = pkl.load(f_pc)
    else:
        pano_caffe = None

    train_env = R2RBatch(feat_dict,
                         pano_caffe,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)
    from collections import OrderedDict

    if args.submit:
        val_env_names.append('test')

    val_envs = OrderedDict(((split, (R2RBatch(feat_dict,
                                              pano_caffe,
                                              batch_size=args.batchSize,
                                              splits=[split],
                                              tokenizer=tok),
                                     Evaluation([split], featurized_scans,
                                                tok)))
                            for split in val_env_names))

    if args.train == 'listener':
        train(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validlistener':
        if args.beam:
            beam_valid(train_env, tok, val_envs=val_envs)
        else:
            valid(train_env, tok, val_envs=val_envs)
    elif args.train == 'speaker':
        train_speaker(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validspeaker':
        valid_speaker(tok, val_envs)
    else:
        assert False
Пример #10
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''
    # args.fast_train = True
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    feat_dict = read_img_features(features)

    featurized_scans = set([key.split("_")[0] for key in list(feat_dict.keys())])

    train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok)
    from collections import OrderedDict

    val_env_names = ['val_unseen', 'val_seen']
    if args.submit:
        val_env_names.append('test')
    else:
        pass
        #val_env_names.append('train')

    if not args.beam:
        val_env_names.append("train")

    val_envs = OrderedDict(
        ((split,
          (R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok),
           Evaluation([split], featurized_scans, tok))
          )
         for split in val_env_names
         )
    )

    if args.train == 'listener':
        train(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'vae_agent':
        train_vae_agent(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validlistener':
        if args.beam:
            beam_valid(train_env, tok, val_envs=val_envs)
        else:
            valid(train_env, tok, val_envs=val_envs)
    elif args.train == 'speaker':
        train_speaker(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validspeaker':
        valid_speaker(train_env, tok, val_envs)
    elif args.train == 'inferspeaker':
        unseen_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['tasks/R2R/data/aug_paths_test.json'], tokenizer=None)
        infer_speaker(unseen_env, tok)
    else:
        assert False
Пример #11
0
def train_test(path_type, max_episode_len, history, MAX_INPUT_LENGTH,
               feedback_method, n_iters, model_prefix, blind):
    ''' Train on the training set, and validate on the test split. '''

    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAINVAL_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    train_env = R2RBatch(features,
                         batch_size=batch_size,
                         splits=['train', 'val_seen', 'val_unseen'],
                         tokenizer=tok,
                         path_type=path_type,
                         history=history,
                         blind=blind)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(features,
                         batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok,
                         path_type=path_type,
                         history=history,
                         blind=blind), Evaluation([split],
                                                  path_type=path_type))
        for split in ['test']
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio).cuda()
    train(train_env,
          encoder,
          decoder,
          n_iters,
          path_type,
          history,
          feedback_method,
          max_episode_len,
          MAX_INPUT_LENGTH,
          model_prefix,
          val_envs=val_envs)
Пример #12
0
def train_val(eval_type, seed, max_episode_len, history, max_input_length,
              feedback_method, n_iters, model_prefix, blind, debug):
    ''' Train on the training set, and validate on seen and unseen splits. '''

    setup(seed)
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=max_input_length)
    train_env = R2RBatch(batch_size=batch_size,
                         splits=['train'],
                         tokenizer=tok,
                         seed=seed,
                         history=history,
                         blind=blind)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok,
                         seed=seed,
                         history=history,
                         blind=blind), Evaluation([split], seed=seed))
        for split in ['val_seen']
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio, feature_size).cuda()
    train(eval_type,
          train_env,
          encoder,
          decoder,
          n_iters,
          seed,
          history,
          feedback_method,
          max_episode_len,
          max_input_length,
          model_prefix,
          val_envs=val_envs,
          debug=debug)
Пример #13
0
def eval_simple_agents():
    """ Run simple baselines on each split. """
    for split in ["train", "val_seen", "val_unseen"]:
        env = R2RBatch(
            Feature(None, False),
            False,
            False,
            6,
            False,
            "lstm",
            batch_size=1,
            splits=[split],
            tokenizer=None,
        )
        ev = Evaluation([split], encoder_type="lstm")  #  subgoal=False)

        for agent_type in ["Stop", "Shortest", "Random"]:
            outfile = "%s%s_%s_agent.json" % (RESULT_DIR, split,
                                              agent_type.lower())
            agent = BaseAgent.get_agent(agent_type)(env, outfile)
            agent.test()
            agent.write_results()
            score_summary, _ = ev.score(outfile)
            print("\n%s" % agent_type)
            pp.pprint(score_summary)
Пример #14
0
def make_more_train_env(args, train_vocab_path, train_splits):
    setup(args.seed)
    image_features_list = ImageFeatures.from_args(args)
    vocab = read_vocab(train_vocab_path)
    tok = Tokenizer(vocab=vocab)
    train_env = R2RBatch(image_features_list, batch_size=args.batch_size,
                         splits=train_splits, tokenizer=tok)
    return train_env
Пример #15
0
def make_env_and_models(args, train_vocab_path, train_splits, test_splits):
    setup(args.seed)
    image_features_list = ImageFeatures.from_args(args)
    vocab = read_vocab(train_vocab_path)
    tok = Tokenizer(vocab=vocab)
    train_env = R2RBatch(image_features_list, batch_size=args.batch_size,
                         splits=train_splits, tokenizer=tok) if len(train_splits) > 0 else None
    test_envs = {
        split: (R2RBatch(image_features_list, batch_size=args.batch_size,
                         splits=[split], tokenizer=tok),
                eval.Evaluation([split]))
        for split in test_splits}

    agent = make_follower(args, vocab)
    agent.env = train_env

    return train_env, test_envs, agent
Пример #16
0
def hard_negative():
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    if args.fast_train:
        feat_dict = read_img_features(features_fast)
    else:
        feat_dict = read_img_features(features)

    candidate_dict = utils.read_candidates(CANDIDATE_FEATURES)

    gt_train_env, gt_val_seen_env, gt_val_unseen_env = gt_envs = list(
        R2RBatch(feat_dict,
                 candidate_dict,
                 batch_size=args.batchSize,
                 splits=[split],
                 tokenizer=tok)
        for split in ['train', 'val_seen', 'val_unseen'])
    neg_train_env, neg_val_seen_env, neg_val_unseen_env = neg_envs = list(
        R2RBatch(feat_dict,
                 candidate_dict,
                 batch_size=args.batchSize,
                 splits=[split + "_instneg", split + "_pathneg"],
                 tokenizer=tok)
        for split in ['train', 'val_seen', 'val_unseen'])
    arbiter_train_env, arbiter_val_seen_env, arbiter_val_unseen_env = (
        ArbiterBatch(gt_env,
                     neg_env,
                     args.batchSize // 2,
                     args.batchSize // 2,
                     feat_dict,
                     candidate_dict,
                     batch_size=args.batchSize,
                     splits=[],
                     tokenizer=tok)
        for gt_env, neg_env in zip(gt_envs, neg_envs))
    train_arbiter(arbiter_train_env,
                  tok,
                  args.iters,
                  val_envs={
                      'train': arbiter_train_env,
                      'val_seen': arbiter_val_seen_env,
                      'val_unseen': arbiter_val_unseen_env,
                  })
Пример #17
0
def train_val_augment(test_only=False):
    """
    Train the listener with the augmented data
    """
    setup()

    # Create a batch training environment that will also preprocess text
    tok_bert = get_tokenizer(args)

    # Load the env img features
    feat_dict = read_img_features(features, test_only=test_only)

    if test_only:
        featurized_scans = None
        val_env_names = ['val_train_seen']
    else:
        featurized_scans = set(
            [key.split("_")[0] for key in list(feat_dict.keys())])
        val_env_names = ['val_train_seen', 'val_seen', 'val_unseen']

    # Load the augmentation data
    aug_path = args.aug
    # Create the training environment
    train_env = R2RBatch(feat_dict,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok_bert)
    aug_env = R2RBatch(feat_dict,
                       batch_size=args.batchSize,
                       splits=[aug_path],
                       tokenizer=tok_bert,
                       name='aug')

    # Setup the validation data
    val_envs = {
        split: (R2RBatch(feat_dict,
                         batch_size=args.batchSize,
                         splits=[split],
                         tokenizer=tok_bert),
                Evaluation([split], featurized_scans, tok_bert))
        for split in val_env_names
    }

    # Start training
    train(train_env, tok_bert, args.iters, val_envs=val_envs, aug_env=aug_env)
Пример #18
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''
    # args.fast_train = True
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    if args.fast_train:
        feat_dict = read_img_features(features_fast)
    else:
        feat_dict = read_img_features(features)

    candidate_dict = utils.read_candidates(CANDIDATE_FEATURES)
    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])

    train_env = R2RBatch(feat_dict,
                         candidate_dict,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)
    from collections import OrderedDict
    val_envs = OrderedDict(((split, (R2RBatch(feat_dict,
                                              candidate_dict,
                                              batch_size=args.batchSize,
                                              splits=[split],
                                              tokenizer=tok),
                                     Evaluation([split], featurized_scans,
                                                tok)))
                            for split in ['val_seen', 'val_unseen', 'train']))

    if args.train == 'listener':
        train(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validlistener':
        valid(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'speaker':
        train_speaker(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validspeaker':
        valid_speaker(tok, val_envs)
    else:
        assert False
Пример #19
0
def train_val(test_only=False):
    ''' Train on the training set, and validate on seen and unseen splits. '''
    setup()
    tok = get_tokenizer(args)

    feat_dict = read_img_features(features, test_only=test_only)

    if test_only:
        featurized_scans = None
        val_env_names = ['val_train_seen']
    else:
        featurized_scans = set(
            [key.split("_")[0] for key in list(feat_dict.keys())])
        val_env_names = ['val_train_seen', 'val_seen', 'val_unseen']

    train_env = R2RBatch(feat_dict,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)
    from collections import OrderedDict

    if args.submit:
        val_env_names.append('test')
    else:
        pass

    val_envs = OrderedDict(((split, (R2RBatch(feat_dict,
                                              batch_size=args.batchSize,
                                              splits=[split],
                                              tokenizer=tok),
                                     Evaluation([split], featurized_scans,
                                                tok)))
                            for split in val_env_names))

    if args.train == 'listener':
        train(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validlistener':
        valid(train_env, tok, val_envs=val_envs)
    else:
        assert False
Пример #20
0
def eval_simple_agents():
    ''' Run simple baselines on each split. '''
    for split in ['train', 'val_seen', 'val_unseen']:
        env = R2RBatch(None, batch_size=1, splits=[split])
        ev = Evaluation([split])

        for agent_type in ['Stop', 'Shortest', 'Random']:
            outfile = '%s%s_%s_agent.json' % (RESULT_DIR, split, agent_type.lower())
            agent = BaseAgent.get_agent(agent_type)(env, outfile)
            agent.test()
            agent.write_results()
            score_summary, _ = ev.score(outfile)
            print('\n%s' % agent_type)
            pp.pprint(score_summary)
Пример #21
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''

    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    train_env = R2RBatch(features,
                         batch_size=batch_size,
                         splits=['train'],
                         tokenizer=tok)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(features,
                         batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok), Evaluation([split]))
        for split in ['val_seen', 'val_unseen']
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    train(train_env, len(vocab), n_iters, val_envs=val_envs)
Пример #22
0
def train_vae():
    """Train vae for sub-policy(z->policy)"""
    setup()
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)
    feat_dict = read_img_features(features)
    featurized_scans = set([key.split("_")[0] for key in list(feat_dict.keys())])
    # Create a batch training environment that will also preprocess text
    train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['sub_train'],tokenizer=tok)
    writer = SummaryWriter(logdir=log_dir)

    obs_dim = train_env.feature_size+args.angle_feat_size
    # TODO: latent_dim ablation
    path_len = 2 # fix path_len = 2, total path_len = 6
    vae = BaseVAE(train_env, tok, obs_dim, args.vae_latent_dim).cuda()
    vae.train()
Пример #23
0
    def go(self):
        self.envs = {}
        for key in self.env_args:
            # print('env', key)
            feature_store, data, scans, bs = self.env_args[key]
            env = R2RBatch(feature_store,
                           bs,
                           splits=None,
                           tokenizer=self.master_model.tok,
                           name='sub_train',
                           record_scans=scans)

            env.data = data
            self.envs[key] = env
            k = key

        while True:
            _ = self.sync_Q.get()
            self.model = agent_v6.SSM(self.envs[k],
                                      self.master_model.results_path,
                                      self.master_model.tok,
                                      self.master_model.episode_len,
                                      self.master_model.max_node,
                                      self.master_model.args)
            self._sync_local_with_global()

            for model in self.model.models:
                model.eval()

            for name in self.envs:
                # print('doing', name)
                iters = None if name != 'train' else 20
                # iters = 1
                self.model.env = self.envs[name]
                self.model.test(use_dropout=False,
                                feedback='argmax',
                                iters=iters)

                res = self.model.get_results()
                self.res_Q.put((name, res))

            del self.model
            self.model = None
            torch.cuda.empty_cache()
Пример #24
0
def eval_simple_agents(args):
    ''' Run simple baselines on each split. '''
    img_features = ImageFeatures.from_args(args)
    for split in ['train', 'val_seen', 'val_unseen', 'test']:
        env = R2RBatch(img_features,
                       batch_size=1,
                       splits=[split],
                       prefix=args.prefix)
        ev = Evaluation([split])

        for agent_type in ['Stop', 'Shortest', 'Random']:
            outfile = '%s%s_%s_agent.json' % (train.RESULT_DIR, split,
                                              agent_type.lower())
            agent = BaseAgent.get_agent(agent_type)(env, outfile)
            agent.test()
            agent.write_results()
            score_summary, _, _ = ev.score_file(outfile)
            print('\n%s' % agent_type)
            pp.pprint(score_summary)
Пример #25
0
def test_submission():
    ''' Train on combined training and validation sets, and generate test submission. '''

    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    # train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok)

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio).cuda()
    # train(train_env, encoder, decoder, n_iters)

    encoder.load_state_dict(torch.load('%s/seq2seq_enc.pt' % (SNAPSHOT_DIR)))
    decoder.load_state_dict(torch.load('%s/seq2seq_dec.pt' % (SNAPSHOT_DIR)))

    # Generate test submission
    test_env = R2RBatch(features,
                        batch_size=batch_size,
                        splits=['test1'],
                        tokenizer=tok)

    agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len)
    agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, 'seq2seq',
                                                   'test1', 20000)
    agent.test(use_dropout=False, feedback='argmax')
    agent.write_results()
Пример #26
0
def train():
    print('current directory', os.getcwd())
    os.chdir('..')
    print('current directory', os.getcwd())

    visible_gpu = "0,1,2,3"  # avaiable GPUs, GPU0 is for processing gradient accumulating
    os.environ["CUDA_VISIBLE_DEVICES"] = visible_gpu

    args.name = 'SSM'
    args.attn = 'soft'
    args.train = 'listener'
    args.featdropout = 0.4
    args.angle_feat_size = 128
    args.feedback = 'sample'
    args.ml_weight = 0.2
    args.sub_out = 'max'
    args.dropout = 0.5
    args.optim = 'rms'
    args.lr = 1e-4
    args.iters = 80000
    args.maxAction = 15
    args.batchSize = 16
    args.aug = 'tasks/R2R/data/aug_paths.json'
    args.self_train = True

    args.featdropout = 0.4
    args.iters = 200000

    if args.optim == 'rms':
        print("Optimizer: Using RMSProp")
        args.optimizer = torch.optim.RMSprop
    elif args.optim == 'adam':
        print("Optimizer: Using Adam")
        args.optimizer = torch.optim.Adam
    elif args.optim == 'sgd':
        print("Optimizer: sgd")
        args.optimizer = torch.optim.SGD

    log_dir = 'snap/%s' % args.name
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    TRAIN_VOCAB = 'tasks/R2R/data/train_vocab.txt'
    TRAINVAL_VOCAB = 'tasks/R2R/data/trainval_vocab.txt'

    IMAGENET_FEATURES = 'img_features/ResNet-152-imagenet.tsv'

    if args.features == 'imagenet':
        features = IMAGENET_FEATURES

    if args.fast_train:
        name, ext = os.path.splitext(features)
        features = name + "-fast" + ext

    print(args)

    def setup():
        torch.manual_seed(1)
        torch.cuda.manual_seed(1)
        # Check for vocabs
        if not os.path.exists(TRAIN_VOCAB):
            write_vocab(build_vocab(splits=['train']), TRAIN_VOCAB)
        if not os.path.exists(TRAINVAL_VOCAB):
            write_vocab(
                build_vocab(splits=['train', 'val_seen', 'val_unseen']),
                TRAINVAL_VOCAB)

    #
    setup()

    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    feat_dict = read_img_features(features)

    # Create the training environment
    train_env = R2RBatch(feat_dict,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)
    aug_env = R2RBatch(feat_dict,
                       batch_size=args.batchSize,
                       splits=[args.aug],
                       tokenizer=tok)

    train_env = {'train': train_env, 'aug': aug_env}

    load_path = None

    torch.autograd.set_detect_anomaly(True)

    learner = Learner(train_env,
                      "",
                      tok,
                      args.maxAction,
                      process_num=4,
                      max_node=17,
                      visible_gpu=visible_gpu)

    if load_path is not None:
        print('load checkpoint from:', load_path)
        learner.load(load_path)

    learner.train()
Пример #27
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''
    # args.fast_train = True
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    feat_dict = read_img_features(features)

    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])

    if not args.test_obj:
        print('Loading compact pano-caffe object features ... (~3 seconds)')
        import pickle as pkl
        with open(
                '/egr/research-hlr/joslin/Matterdata/v1/scans/img_features/pano_object_class.pkl',
                'rb') as f_pc:
            pano_caffe = pkl.load(f_pc)
    else:
        pano_caffe = None

    train_env = R2RBatch(feat_dict,
                         pano_caffe,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)
    from collections import OrderedDict

    val_env_names = ['val_unseen', 'val_seen']
    if args.submit:
        val_env_names.append('test')
    else:
        pass
        # if you want to test "train", just uncomment this
        #val_env_names.append('train')

    if not args.beam:
        val_env_names.append("train")

    val_envs = OrderedDict(((split, (R2RBatch(feat_dict,
                                              pano_caffe,
                                              batch_size=args.batchSize,
                                              splits=[split],
                                              tokenizer=tok),
                                     Evaluation([split], featurized_scans,
                                                tok)))
                            for split in val_env_names))

    # import sys
    # sys.exit()
    if args.train == 'listener':
        train(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validlistener':
        if args.beam:
            beam_valid(train_env, tok, val_envs=val_envs)
        else:
            valid(train_env, tok, val_envs=val_envs)
    elif args.train == 'speaker':
        train_speaker(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validspeaker':
        valid_speaker(tok, val_envs)
    else:
        assert False
Пример #28
0
def test():
    print('current directory', os.getcwd())
    os.chdir('..')
    print('current directory', os.getcwd())

    visible_gpu = "0"
    os.environ["CUDA_VISIBLE_DEVICES"] = visible_gpu

    args.name = 'SSM'
    args.attn = 'soft'
    args.train = 'listener'
    args.featdropout = 0.3
    args.angle_feat_size = 128
    args.feedback = 'sample'
    args.ml_weight = 0.2
    args.sub_out = 'max'
    args.dropout = 0.5
    args.optim = 'adam'
    args.lr = 3e-4
    args.iters = 80000
    args.maxAction = 35
    args.batchSize = 24
    args.target_batch_size = 24

    args.self_train = True
    args.aug = 'tasks/R2R/data/aug_paths.json'

    args.speaker = 'snap/speaker/state_dict/best_val_unseen_bleu'

    args.featdropout = 0.4
    args.iters = 200000

    if args.optim == 'rms':
        print("Optimizer: Using RMSProp")
        args.optimizer = torch.optim.RMSprop
    elif args.optim == 'adam':
        print("Optimizer: Using Adam")
        args.optimizer = torch.optim.Adam
    elif args.optim == 'sgd':
        print("Optimizer: sgd")
        args.optimizer = torch.optim.SGD

    log_dir = 'snap/%s' % args.name
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    logdir = '%s/eval' % log_dir
    writer = SummaryWriter(logdir=logdir)

    TRAIN_VOCAB = 'tasks/R2R/data/train_vocab.txt'
    TRAINVAL_VOCAB = 'tasks/R2R/data/trainval_vocab.txt'

    IMAGENET_FEATURES = 'img_features/ResNet-152-imagenet.tsv'

    if args.features == 'imagenet':
        features = IMAGENET_FEATURES

    if args.fast_train:
        name, ext = os.path.splitext(features)
        features = name + "-fast" + ext

    print(args)

    def setup():
        torch.manual_seed(1)
        torch.cuda.manual_seed(1)
        # Check for vocabs
        if not os.path.exists(TRAIN_VOCAB):
            write_vocab(build_vocab(splits=['train']), TRAIN_VOCAB)
        if not os.path.exists(TRAINVAL_VOCAB):
            write_vocab(
                build_vocab(splits=['train', 'val_seen', 'val_unseen']),
                TRAINVAL_VOCAB)

    #
    setup()

    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    feat_dict = read_img_features(features)

    print('start extract keys...')
    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])
    print('keys extracted...')

    val_envs = {
        split: R2RBatch(feat_dict,
                        batch_size=args.batchSize,
                        splits=[split],
                        tokenizer=tok)
        for split in ['train', 'val_seen', 'val_unseen']
    }

    evaluators = {
        split: Evaluation([split], featurized_scans, tok)
        for split in ['train', 'val_seen', 'val_unseen']
    }

    learner = Learner(val_envs,
                      "",
                      tok,
                      args.maxAction,
                      process_num=2,
                      visible_gpu=visible_gpu)
    learner.eval_init()

    for i in range(0, 10000):
        ckpt = '%s/state_dict/Iter_%06d' % (log_dir, (i + 1) * 100)
        while not os.path.exists(ckpt):
            time.sleep(10)

        time.sleep(10)

        learner.load_eval(ckpt)

        results = learner.eval()
        loss_str = ''
        for key in results:
            evaluator = evaluators[key]
            result = results[key]

            score_summary, _ = evaluator.score(result)

            loss_str += ", %s \n" % key

            for metric, val in score_summary.items():
                loss_str += ', %s: %.3f' % (metric, val)
                writer.add_scalar('%s/%s' % (metric, key), val, (i + 1) * 100)

            loss_str += '\n'

        print(loss_str)
Пример #29
0
    'prog_monitor': True,
    'dev_monitor': False,
    'attn_only_verb': False,
    'soft_align': False,
    'scorer': None,
    'load_follower':
    'tasks/R2R/experiments/pretrain_cgPm_pertraj/snapshots/follower_cg_pm_sample2step_imagenet_mean_pooled_1heads_train_iter_1900_val_unseen-success_rate=0.478',
    'language': 'en-OLD',
    'prefix': 'R2R',
})

image_features_list = ImageFeatures.from_args(args)
vocab = read_vocab(TRAIN_VOCAB, args.language)
tok = Tokenizer(vocab)
env = R2RBatch(image_features_list,
               batch_size=256,
               splits=['train', 'val_seen', 'val_unseen'],
               tokenizer=tok)
env.batch = env.data

from eval import Evaluation

test_envs = {
    split: (R2RBatch(image_features_list,
                     batch_size=64,
                     splits=[split],
                     tokenizer=tok), Evaluation([split]))
    for split in ['val_unseen']
}

agent = make_follower(args, vocab)
Пример #30
0
def make_env_and_models(args,
                        train_vocab_path,
                        train_splits,
                        test_splits,
                        test_instruction_limit=None):
    setup()
    image_features_list = ImageFeatures.from_args(args)
    vocab = read_vocab(train_vocab_path)
    tok = Tokenizer(vocab=vocab)
    train_env = R2RBatch(image_features_list,
                         batch_size=batch_size,
                         splits=train_splits,
                         tokenizer=tok)

    train_env.data.extend(hardNeg_train)  # extend train data and shuffle
    random.shuffle(train_env.data)

    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    glove = np.load(glove_path)
    feature_size = FEATURE_SIZE

    # =============================================================================
    #     visEncoder = try_cuda(CompatVisEncoderLSTM(
    #         action_embedding_size, feature_size, enc_hidden_size, dropout_ratio,
    #         bidirectional=bidirectional))
    # =============================================================================
    visEncoder = try_cuda(
        SpeakerEncoderLSTM(action_embedding_size,
                           feature_size,
                           enc_hidden_size,
                           dropout_ratio,
                           bidirectional=bidirectional))
    # =============================================================================
    #     lanEncoder = try_cuda(CompatLanEncoderLSTM(
    #         len(vocab), word_embedding_size, enc_hidden_size, vocab_pad_idx,
    #         dropout_ratio, bidirectional=True, glove=glove))
    # =============================================================================
    lanEncoder = try_cuda(
        EncoderLSTM(len(vocab),
                    word_embedding_size,
                    enc_hidden_size,
                    vocab_pad_idx,
                    dropout_ratio,
                    bidirectional=False,
                    glove=glove))
    dotSim = try_cuda(dotSimilarity(batch_size, enc_hidden_size))
    #visEncoder.load_state_dict(torch.load('tasks/R2R/snapshots/release/speaker_final_release_enc'))
    #lanEncoder.load_state_dict(torch.load('tasks/R2R/snapshots/release/follower_final_release_enc'))

    test_envs = {
        split: (R2RBatch(image_features_list,
                         batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok,
                         instruction_limit=test_instruction_limit),
                eval_speaker.SpeakerEvaluation(
                    [split], instructions_per_path=test_instruction_limit))
        for split in test_splits
    }

    #test_envs['val_seen'][0].data.extend(hardNeg_val_seen)
    test_envs['val_unseen'][0].data.extend(hardNeg_val_unseen)
    test_envs['val_unseen'][0].data = test_envs['val_unseen'][0].data[
        3000:4000]
    return train_env, test_envs, visEncoder, lanEncoder, dotSim