# Note: if you're doing the stanford setup, you'll need to change this to freeze the lower layers

optimizer = optim.SGD([p for p in detector.parameters() if p.requires_grad],
                      weight_decay=conf.l2,
                      lr=conf.lr * conf.num_gpus * conf.batch_size,
                      momentum=0.9)

criterion = nn.CrossEntropyLoss()

for name in detector.state_dict():
    print(name)

start_epoch = -1
if conf.ckpt is not None:
    ckpt = torch.load(conf.ckpt)
    if optimistic_restore(detector, ckpt['state_dict']):
        start_epoch = ckpt['epoch']


def train_epoch(epoch_num):
    detector.train()
    tr = []
    start = time.time()
    for b, batch in tqdm(enumerate(train_loader)):
        tr.append(train_batch(batch))

        if b % conf.print_interval == 0 and b >= conf.print_interval:
            mn = pd.concat(tr[-conf.print_interval:], axis=1).mean(1)
            time_per_batch = (time.time() - start) / conf.print_interval
            print("\ne{:2d}b{:5d}/{:5d} {:.3f}s/batch, {:.1f}m/epoch".format(
                epoch_num, b, len(train_loader), time_per_batch,
Exemplo n.º 2
0
# lang_stats = language_eval(json.load(open(conf.save_dir+'_print_predictions.json')), test.coco_ids[:2000], conf.save_dir+'_cache.json')


### load the caption generator ##################################################
captionGenerator = GCNLSTMModel(train.ix_to_word, train.vocab_size, input_encoding_size=300, Dconv=4096,
                                num_predicate=len(train.ind_to_predicates), rnn_type='lstm',
                                rnn_size=512, num_layers=1, drop_prob_lm=0.5, seq_length=16, seq_per_img=5,
                                att_feat_size=512,
                                num_relation=conf.num_relation, freq_bl=conf.freq_bl)
captionGenerator.cuda()
print(print_para(captionGenerator), flush=True)

if conf.caption_ckpt is not None:
    caption_ckpt = torch.load(conf.caption_ckpt)
    start_epoch = caption_ckpt['epoch']
    if not optimistic_restore(captionGenerator, caption_ckpt['state_dict']):
        start_epoch = -1
else:
    start_epoch = -1

###### now load the relation detector and set it to test mode!!! ###################################
detector = RelModel(classes=train.ind_to_classes, rel_classes=train.ind_to_predicates,
                    num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True,
                    use_resnet=conf.use_resnet, order=conf.order, pick_parent=conf.pick_parent,
                    nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim,
                    use_proposals=conf.use_proposals,
                    pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder,
                    pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge,
                    pooling_dim=conf.pooling_dim,
                    rec_dropout=conf.rec_dropout,
                    use_bias=conf.use_bias,
Exemplo n.º 3
0
def main():
    fname = os.path.join(conf.save_dir, 'train_losses.csv')
    train_f = open(fname, 'w')
    train_f.write(
        'iter,class_loss,rel_loss,total,recall20,recall50,recall100,recall20_con,recall50_con,recall100_con\n'
    )
    train_f.flush()

    fname = os.path.join(conf.save_dir, 'val_losses.csv')
    val_f = open(fname, 'w')
    val_f.write(
        'recall20,recall50,recall100,recall20_con,recall50_con,recall100_con\n'
    )
    val_f.flush()

    train, val, _ = VG.splits(num_val_im=conf.val_size,
                              filter_duplicate_rels=True,
                              use_proposals=conf.use_proposals,
                              filter_non_overlap=conf.mode == 'sgdet')
    train_loader, val_loader = VGDataLoader.splits(
        train,
        val,
        mode='rel',
        batch_size=conf.batch_size,
        num_workers=conf.num_workers,
        num_gpus=conf.num_gpus)

    detector = RelModel(
        classes=train.ind_to_classes,
        rel_classes=train.ind_to_predicates,
        num_gpus=conf.num_gpus,
        mode=conf.mode,
        require_overlap_det=True,
        use_resnet=conf.use_resnet,
        order=conf.order,
        nl_edge=conf.nl_edge,
        nl_obj=conf.nl_obj,
        hidden_dim=conf.hidden_dim,
        use_proposals=conf.use_proposals,
        pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder,
        pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge,
        pooling_dim=conf.pooling_dim,
        rec_dropout=conf.rec_dropout,
        use_bias=conf.use_bias,
        use_tanh=conf.use_tanh,
        limit_vision=conf.limit_vision,
        lml_topk=conf.lml_topk,
        lml_softmax=conf.lml_softmax,
        entr_topk=conf.entr_topk,
        ml_loss=conf.ml_loss)

    # Freeze the detector
    for n, param in detector.detector.named_parameters():
        param.requires_grad = False

    print(print_para(detector), flush=True)

    ckpt = torch.load(conf.ckpt)
    if conf.ckpt.split('-')[-2].split('/')[-1] == 'vgrel':
        print("Loading EVERYTHING")
        start_epoch = ckpt['epoch']

        if not optimistic_restore(detector, ckpt['state_dict']):
            start_epoch = -1
            # optimistic_restore(
            #     detector.detector,
            #     torch.load('checkpoints/vgdet/vg-28.tar')['state_dict']
            # )
    else:
        start_epoch = -1
        optimistic_restore(detector.detector, ckpt['state_dict'])

        detector.roi_fmap[1][0].weight.data.copy_(
            ckpt['state_dict']['roi_fmap.0.weight'])
        detector.roi_fmap[1][3].weight.data.copy_(
            ckpt['state_dict']['roi_fmap.3.weight'])
        detector.roi_fmap[1][0].bias.data.copy_(
            ckpt['state_dict']['roi_fmap.0.bias'])
        detector.roi_fmap[1][3].bias.data.copy_(
            ckpt['state_dict']['roi_fmap.3.bias'])

        detector.roi_fmap_obj[0].weight.data.copy_(
            ckpt['state_dict']['roi_fmap.0.weight'])
        detector.roi_fmap_obj[3].weight.data.copy_(
            ckpt['state_dict']['roi_fmap.3.weight'])
        detector.roi_fmap_obj[0].bias.data.copy_(
            ckpt['state_dict']['roi_fmap.0.bias'])
        detector.roi_fmap_obj[3].bias.data.copy_(
            ckpt['state_dict']['roi_fmap.3.bias'])

    detector.cuda()

    print("Training starts now!")
    optimizer, scheduler = get_optim(detector,
                                     conf.lr * conf.num_gpus * conf.batch_size)
    best_eval = None
    for epoch in range(start_epoch + 1, start_epoch + 1 + conf.num_epochs):
        rez = train_epoch(epoch, detector, train, train_loader, optimizer,
                          conf, train_f)
        print("overall{:2d}: ({:.3f})\n{}".format(epoch,
                                                  rez.mean(1)['total'],
                                                  rez.mean(1)),
              flush=True)

        mAp = val_epoch(detector, val, val_loader, val_f)
        scheduler.step(mAp)

        if conf.save_dir is not None:
            if best_eval is None or mAp > best_eval:
                torch.save(
                    {
                        'epoch': epoch,
                        'state_dict': detector.state_dict(),
                        # 'optimizer': optimizer.state_dict(),
                    },
                    os.path.join(conf.save_dir, 'best-val.tar'))
                best_eval = mAp
Exemplo n.º 4
0
                                  verbose=True,
                                  threshold=0.0001,
                                  threshold_mode='abs',
                                  cooldown=1)
    return optimizer, scheduler


# ii_rel = FocalLoss(train.num_predicates, PREDICATES_WEIGHTS, CURRICULUM, size_average=False)
# ii_obj = FocalLoss(train.num_classes, size_average=True)

ckpt = torch.load(conf.ckpt)
if conf.ckpt.split('-')[-2].split('/')[-1] == 'vgrel':
    logger.info("Loading EVERYTHING")
    start_epoch = ckpt['epoch']

    if not optimistic_restore(detector, ckpt['state_dict']):
        start_epoch = -1
        # optimistic_restore(detector.detector, torch.load('checkpoints/vgdet/vg-28.tar')['state_dict'])
else:
    start_epoch = -1
    optimistic_restore(detector.detector, ckpt['state_dict'])

    detector.roi_fmap[1][0].weight.data.copy_(
        ckpt['state_dict']['roi_fmap.0.weight'])
    detector.roi_fmap[1][3].weight.data.copy_(
        ckpt['state_dict']['roi_fmap.3.weight'])
    detector.roi_fmap[1][0].bias.data.copy_(
        ckpt['state_dict']['roi_fmap.0.bias'])
    detector.roi_fmap[1][3].bias.data.copy_(
        ckpt['state_dict']['roi_fmap.3.bias'])
Exemplo n.º 5
0
    def __init__(self, args, ind_to_classes):
        super(neural_motifs_sg2im_model, self).__init__()
        self.args = args

        # define and initial detector
        self.detector = ObjectDetector(
            classes=ind_to_classes,
            num_gpus=args.num_gpus,
            mode='refinerels' if not args.use_proposals else 'proposals',
            use_resnet=args.use_resnet)
        if args.ckpt is not None:
            ckpt = torch.load(args.ckpt)
            optimistic_restore(self.detector, ckpt['state_dict'])
        self.detector.eval()

        # define and initial generator, image_discriminator, obj_discriminator,
        # and corresponding optimizer
        vocab = {
            'object_idx_to_name': ind_to_classes,
        }
        self.model, model_kwargs = build_model(args)

        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          lr=args.learning_rate)

        self.obj_discriminator, d_obj_kwargs = build_obj_discriminator(
            args, vocab)
        self.img_discriminator, d_img_kwargs = build_img_discriminator(args)

        if self.obj_discriminator is not None:
            self.obj_discriminator.train()
            self.optimizer_d_obj = torch.optim.Adam(
                self.obj_discriminator.parameters(), lr=args.learning_rate)

        if self.img_discriminator is not None:
            self.img_discriminator.train()
            self.optimizer_d_img = torch.optim.Adam(
                self.img_discriminator.parameters(), lr=args.learning_rate)

        restore_path = None
        if args.restore_from_checkpoint:
            restore_path = '%s_with_model.pt' % args.checkpoint_name
            restore_path = os.path.join(args.output_dir, restore_path)
        if restore_path is not None and os.path.isfile(restore_path):
            print('Restoring from checkpoint:')
            print(restore_path)
            checkpoint = torch.load(restore_path)
            self.model.load_state_dict(checkpoint['model_state'])
            self.optimizer.load_state_dict(checkpoint['optim_state'])

            if self.obj_discriminator is not None:
                self.obj_discriminator.load_state_dict(
                    checkpoint['d_obj_state'])
                self.optimizer_d_obj.load_state_dict(
                    checkpoint['d_obj_optim_state'])

            if self.img_discriminator is not None:
                self.img_discriminator.load_state_dict(
                    checkpoint['d_img_state'])
                self.optimizer_d_img.load_state_dict(
                    checkpoint['d_img_optim_state'])

            t = checkpoint['counters']['t']
            if 0 <= args.eval_mode_after <= t:
                self.model.eval()
            else:
                self.model.train()
            epoch = checkpoint['counters']['epoch']
        else:
            t, epoch = 0, 0
            checkpoint = {
                'vocab': vocab,
                'model_kwargs': model_kwargs,
                'd_obj_kwargs': d_obj_kwargs,
                'd_img_kwargs': d_img_kwargs,
                'losses_ts': [],
                'losses': defaultdict(list),
                'd_losses': defaultdict(list),
                'checkpoint_ts': [],
                'train_batch_data': [],
                'train_samples': [],
                'train_iou': [],
                'val_batch_data': [],
                'val_samples': [],
                'val_losses': defaultdict(list),
                'val_iou': [],
                'norm_d': [],
                'norm_g': [],
                'counters': {
                    't': None,
                    'epoch': None,
                },
                'model_state': None,
                'model_best_state': None,
                'optim_state': None,
                'd_obj_state': None,
                'd_obj_best_state': None,
                'd_obj_optim_state': None,
                'd_img_state': None,
                'd_img_best_state': None,
                'd_img_optim_state': None,
                'best_t': [],
            }

        self.t, self.epoch, self.checkpoint = t, epoch, checkpoint
Exemplo n.º 6
0
                                  'max',
                                  patience=3,
                                  factor=0.1,
                                  verbose=True,
                                  threshold=0.0001,
                                  threshold_mode='abs',
                                  cooldown=1)
    return optimizer, scheduler


ckpt = torch.load(conf.ckpt)
if conf.ckpt.split('-')[-2].split('/')[-1] == 'vgrel':
    print("Loading EVERYTHING")
    start_epoch = ckpt['epoch']

    if not optimistic_restore(detector, ckpt['state_dict']):
        start_epoch = -1
        # optimistic_restore(detector.detector, torch.load('checkpoints/vgdet/vg-28.tar')['state_dict'])
else:
    start_epoch = -1
    optimistic_restore(detector.detector, ckpt['state_dict'])  #物体检测器参数恢复

    #两个Linear layer,来自VGG16的预训练好的层
    detector.roi_fmap[1][0].weight.data.copy_(
        ckpt['state_dict']['roi_fmap.0.weight'])
    detector.roi_fmap[1][3].weight.data.copy_(
        ckpt['state_dict']['roi_fmap.3.weight'])
    detector.roi_fmap[1][0].bias.data.copy_(
        ckpt['state_dict']['roi_fmap.0.bias'])
    detector.roi_fmap[1][3].bias.data.copy_(
        ckpt['state_dict']['roi_fmap.3.bias'])
    nms_filter_duplicates=True,
    thresh=0.01)
detector.eval()
detector.cuda()

classifier = ObjectDetector(classes=train.ind_to_classes,
                            num_gpus=conf.num_gpus,
                            mode='gtbox',
                            use_resnet=conf.use_resnet,
                            nms_filter_duplicates=True,
                            thresh=0.01)
classifier.eval()
classifier.cuda()

ckpt = torch.load(conf.ckpt)
mismatch = optimistic_restore(detector, ckpt['state_dict'])
mismatch = optimistic_restore(classifier, ckpt['state_dict'])

MOST_COMMON_MODE = True

if MOST_COMMON_MODE:
    prob_matrix = fg_matrix.astype(np.float32)
    prob_matrix[:, :, 0] = bg_matrix

    # TRYING SOMETHING NEW.
    prob_matrix[:, :, 0] += 1
    prob_matrix /= np.sum(prob_matrix, 2)[:, :, None]
    # prob_matrix /= float(fg_matrix.max())

    np.save(os.path.join(DATA_PATH, 'pred_stats.npy'), prob_matrix)
    prob_matrix[:, :, 0] = 0  # Zero out BG
Exemplo n.º 8
0
def main():
    args = 'X -m predcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/vgrel-motifnet-sgcls.tar -nepoch 50 -use_bias -multipred -cache motifnet_predcls1'
    sys.argv = args.split(' ')
    conf = ModelConfig()

    if conf.model == 'motifnet':
        from lib.rel_model import RelModel
    elif conf.model == 'stanford':
        from lib.rel_model_stanford import RelModelStanford as RelModel
    else:
        raise ValueError()

    train, val, test = VG.splits(
        num_val_im=conf.val_size, filter_duplicate_rels=True,
        use_proposals=conf.use_proposals,
        filter_non_overlap=conf.mode == 'sgdet',
    )
    if conf.test:
        val = test
    train_loader, val_loader = VGDataLoader.splits(
        train, val, mode='rel', batch_size=conf.batch_size,
        num_workers=conf.num_workers, num_gpus=conf.num_gpus
    )

    detector = RelModel(
        classes=train.ind_to_classes, rel_classes=train.ind_to_predicates,
        num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True,
        use_resnet=conf.use_resnet, order=conf.order,
        nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim,
        use_proposals=conf.use_proposals,
        pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder,
        pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge,
        pooling_dim=conf.pooling_dim,
        rec_dropout=conf.rec_dropout,
        use_bias=conf.use_bias,
        use_tanh=conf.use_tanh,
        limit_vision=conf.limit_vision
    )


    detector.cuda()
    ckpt = torch.load(conf.ckpt)

    optimistic_restore(detector, ckpt['state_dict'])

    evaluator = BasicSceneGraphEvaluator.all_modes(
        multiple_preds=conf.multi_pred)

    mode, N = 'test.multi_pred', 20
    recs = pkl.load(open('{}.{}.pkl'.format(mode, N), 'rb'))

    np.random.seed(0)
    # sorted_idxs = np.argsort(recs)
    selected_idxs = np.random.choice(range(len(recs)), size=100, replace=False)
    sorted_idxs = selected_idxs[np.argsort(np.array(recs)[selected_idxs])]
    print('Sorted idxs: {}'.format(sorted_idxs.tolist()))

    save_dir = '/nethome/bamos/2018-intel/data/2018-07-31/sgs.multi'

    for idx in selected_idxs:
        gt_entry = {
            'gt_classes': val.gt_classes[idx].copy(),
            'gt_relations': val.relationships[idx].copy(),
            'gt_boxes': val.gt_boxes[idx].copy(),
        }

        detector.eval()
        det_res = detector[vg_collate([test[idx]], num_gpus=1)]

        boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i = det_res
        pred_entry = {
            'pred_boxes': boxes_i * BOX_SCALE/IM_SCALE,
            'pred_classes': objs_i,
            'pred_rel_inds': rels_i,
            'obj_scores': obj_scores_i,
            'rel_scores': pred_scores_i,
        }


        unique_cnames = get_unique_cnames(gt_entry, test)
        save_img(idx, recs, test, gt_entry, det_res, unique_cnames, save_dir)
        save_gt_graph(idx, test, gt_entry, det_res, unique_cnames, save_dir)
        save_pred_graph(idx, test, pred_entry, det_res,
                        unique_cnames, save_dir,
                        multi_pred=conf.multi_pred, n_pred=20)