def graph_construction(object_rois, gt_rois=None):
        if isinstance(object_rois, torch.Tensor):
            object_rois = object_rois.cpu().numpy()
        object_rois, region_rois, mat_object, mat_phrase, mat_region = graph_construction_py(
            object_rois, gt_rois)
        object_rois = network.np_to_variable(object_rois, is_cuda=True)
        region_rois = network.np_to_variable(region_rois, is_cuda=True)

        return object_rois, region_rois, mat_object, mat_phrase, mat_region
Пример #2
0
 def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info,
                 _feat_stride, opts, anchor_scales, anchor_ratios, mappings):
     rpn_cls_prob_reshape = rpn_cls_prob_reshape.data.cpu().numpy()
     rpn_bbox_pred = rpn_bbox_pred.data.cpu().numpy()
     x = proposal_layer_py(rpn_cls_prob_reshape, rpn_bbox_pred, im_info,
                 _feat_stride, opts, anchor_scales, anchor_ratios, mappings)
     x = network.np_to_variable(x, is_cuda=True)
     return x.view(-1, 6)
    def proposal_target_layer(object_rois, gt_objects, gt_relationships,
                              n_classes_obj):
        """
        ----------
        object_rois:  (1 x H x W x A, 5) [0, x1, y1, x2, y2]
        region_rois:  (1 x H x W x A, 5) [0, x1, y1, x2, y2]
        gt_objects:   (G_obj, 5) [x1 ,y1 ,x2, y2, obj_class] int
        gt_relationships: (G_obj, G_obj) [pred_class] int (-1 for no relationship)
        gt_regions:   (G_region, 4+40) [x1, y1, x2, y2, word_index] (-1 for padding)
        # gt_ishard: (G_region, 4+40) {0 | 1} 1 indicates hard
        # dontcare_areas: (D, 4) [ x1, y1, x2, y2]
        n_classes_obj
        n_classes_pred
        is_training to indicate whether in training scheme
        ----------
        Returns
        ----------
        rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2]
        labels: (1 x H x W x A, 1) {0,1,...,_num_classes-1}
        bbox_targets: (1 x H x W x A, K x4) [dx1, dy1, dx2, dy2]
        bbox_inside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss
        bbox_outside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss
        """

        object_rois = object_rois.data.cpu().numpy()

        targets_object, targets_phrase, targets_region = proposal_target_layer_py(
            object_rois, gt_objects, gt_relationships, n_classes_obj)
        # print labels.shape, bbox_targets.shape, bbox_inside_weights.shape
        object_labels, object_rois, bbox_targets, bbox_inside_weights, mat_object, object_fg_duplicate = targets_object[:
                                                                                                                        6]
        phrase_labels, mat_phrase = targets_phrase[:2]
        region_rois, mat_region = targets_region[:2]

        object_rois = network.np_to_variable(object_rois, is_cuda=True)
        region_rois = network.np_to_variable(region_rois, is_cuda=True)
        object_labels = network.np_to_variable(object_labels,
                                               is_cuda=True,
                                               dtype=torch.LongTensor)
        bbox_targets = network.np_to_variable(bbox_targets, is_cuda=True)
        bbox_inside_weights = network.np_to_variable(bbox_inside_weights,
                                                     is_cuda=True)
        phrase_labels = network.np_to_variable(phrase_labels,
                                               is_cuda=True,
                                               dtype=torch.LongTensor)
        duplicate_labels = network.np_to_variable(object_fg_duplicate,
                                                  is_cuda=True)

        return tuple([object_labels, object_rois, bbox_targets, bbox_inside_weights, duplicate_labels]), \
                tuple([phrase_labels]), \
                tuple([None, region_rois]), \
                mat_object, mat_phrase, mat_region
Пример #4
0
def train(loader,
          model,
          optimizer,
          exp_logger,
          epoch,
          train_all,
          print_freq=100,
          clip_gradient=True,
          iter_size=1):

    model.train()
    meters = exp_logger.reset_meters('train')
    end = time.time()

    for i, sample in enumerate(
            loader):  # (im_data, im_info, gt_objects, gt_relationships)
        # measure the data loading time
        batch_size = len(sample['visual'])
        # measure data loading time
        meters['data_time'].update(time.time() - end, n=batch_size)

        input_visual = [item for item in sample['visual']]
        target_objects = sample['objects']
        target_relations = sample['relations']
        image_info = sample['image_info']
        # RPN targets
        rpn_anchor_targets_obj = [[
            np_to_variable(item[0], is_cuda=False, dtype=torch.LongTensor),
            np_to_variable(item[1], is_cuda=False),
            np_to_variable(item[2], is_cuda=False),
            np_to_variable(item[3], is_cuda=False)
        ] for item in sample['rpn_targets']['object']]

        # compute output
        try:
            raw_losses = model(im_data=input_visual,
                               im_info=image_info,
                               gt_objects=target_objects,
                               gt_relationships=target_relations,
                               rpn_anchor_targets_obj=rpn_anchor_targets_obj)

            # Determine the loss function
            def merge_losses(losses):
                for key in losses:
                    if isinstance(losses[key], dict) or isinstance(
                            losses[key], list):
                        losses[key] = merge_losses(losses[key])
                    elif key.startswith('loss'):
                        losses[key] = losses[key].mean()
                return losses

            losses = merge_losses(raw_losses)
            if train_all:
                loss = losses['loss'] + losses['rpn']['loss'] * 0.5
            else:
                loss = losses['loss']
            # to logging the loss and itermediate values
            meters['loss'].update(losses['loss'].cpu().item(), n=batch_size)
            meters['loss_cls_obj'].update(losses['loss_cls_obj'].cpu().item(),
                                          n=batch_size)
            meters['loss_reg_obj'].update(losses['loss_reg_obj'].cpu().item(),
                                          n=batch_size)
            meters['loss_cls_rel'].update(losses['loss_cls_rel'].cpu().item(),
                                          n=batch_size)
            meters['loss_rpn'].update(losses['rpn']['loss'].cpu().item(),
                                      n=batch_size)
            meters['batch_time'].update(time.time() - end, n=batch_size)
            meters['epoch_time'].update(meters['batch_time'].val, n=batch_size)

            # add support for iter size
            # special case: last iterations
            optimizer.zero_grad()
            loss.backward()
            if clip_gradient:
                network.clip_gradient(model, 10.)
            else:
                network.avg_gradient(model, iter_size)
            optimizer.step()

        except:
            import pdb
            pdb.set_trace()
            print("Error: [{}]".format(i))
        end = time.time()
        # Logging the training loss
        if (i + 1) % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}] '
                  'Batch_Time: {batch_time.avg: .3f}\t'
                  'FRCNN Loss: {loss.avg: .4f}\t'
                  'RPN Loss: {rpn_loss.avg: .4f}\t'.format(
                      epoch,
                      i + 1,
                      len(loader),
                      batch_time=meters['batch_time'],
                      loss=meters['loss'],
                      rpn_loss=meters['loss_rpn']))

            print('\t[object] loss_cls_obj: {loss_cls_obj.avg:.4f} '
                  'loss_reg_obj: {loss_reg_obj.avg:.4f} '
                  'loss_cls_rel: {loss_cls_rel.avg:.4f} '.format(
                      loss_cls_obj=meters['loss_cls_obj'],
                      loss_reg_obj=meters['loss_reg_obj'],
                      loss_cls_rel=meters['loss_cls_rel'],
                  ))

    exp_logger.log_meters('train', n=epoch)
Пример #5
0
def train(train_loader, target_net, optimizer, epoch):
    batch_time = network.AverageMeter()
    data_time = network.AverageMeter()
    train_loss = network.AverageMeter()
    train_loss_obj_box = network.AverageMeter()
    train_loss_obj_entropy = network.AverageMeter()

    accuracy_obj = network.AccuracyMeter()

    target_net.train()
    end = time.time()
    for i, sample in enumerate(train_loader):

        # measure the data loading time
        data_time.update(time.time() - end)
        im_data = sample['visual'][0].cuda()
        im_info = sample['image_info']
        gt_objects = sample['objects']
        anchor_targets = [
            np_to_variable(sample['rpn_targets']['object'][0][0],
                           is_cuda=True,
                           dtype=torch.LongTensor),
            np_to_variable(sample['rpn_targets']['object'][0][1],
                           is_cuda=True),
            np_to_variable(sample['rpn_targets']['object'][0][2],
                           is_cuda=True),
            np_to_variable(sample['rpn_targets']['object'][0][3], is_cuda=True)
        ]
        # Forward pass
        target_net(im_data, im_info, rpn_data=anchor_targets)
        # record loss
        loss = target_net.loss
        # total loss
        train_loss.update(loss.data[0], im_data.size(0))
        # object bbox reg
        train_loss_obj_box.update(target_net.loss_box.data[0], im_data.size(0))
        # object score
        train_loss_obj_entropy.update(target_net.loss_cls.data[0],
                                      im_data.size(0))
        # accuracy
        accuracy_obj.update(target_net.tp, target_net.tf, target_net.fg_cnt,
                            target_net.bg_cnt)

        # backward
        optimizer.zero_grad()
        torch.cuda.synchronize()
        loss.backward()
        if not args.disable_clip_gradient:
            network.clip_gradient(target_net, 10.)
        torch.cuda.synchronize()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if (i + 1) % args.log_interval == 0:
            print(
                'Epoch: [{0}][{1}/{2}]\t'
                'Batch_Time: {batch_time.avg:.3f}s\t'
                'lr: {lr: f}\t'
                'Loss: {loss.avg:.4f}\n'
                '\t[object]:\t'
                'tp: {accuracy_obj.true_pos:.3f}, \t'
                'tf: {accuracy_obj.true_neg:.3f}, \t'
                'fg/bg=({accuracy_obj.foreground:.1f}/{accuracy_obj.background:.1f})\t'
                'cls_loss: {cls_loss_object.avg:.3f}\t'
                'reg_loss: {reg_loss_object.avg:.3f}'.format(
                    epoch,
                    i + 1,
                    len(train_loader),
                    batch_time=batch_time,
                    lr=args.lr,
                    data_time=data_time,
                    loss=train_loss,
                    cls_loss_object=train_loss_obj_entropy,
                    reg_loss_object=train_loss_obj_box,
                    accuracy_obj=accuracy_obj))
Пример #6
0
def train(loader,
          model,
          optimizer,
          exp_logger,
          epoch,
          train_all,
          print_freq=100,
          clip_gradient=True,
          iter_size=1):

    model.train()
    meters = exp_logger.reset_meters('train')
    end = time.time()

    for i, sample in enumerate(
            loader):  # (im_data, im_info, gt_objects, gt_relationships)
        # measure the data loading time
        batch_size = sample['visual'].size(0)

        # measure data loading time
        meters['data_time'].update(time.time() - end, n=batch_size)

        input_visual = Variable(sample['visual'].cuda())
        target_objects = sample['objects']
        target_relations = sample['relations']
        image_info = sample['image_info']
        # RPN targets
        rpn_anchor_targets_obj = [
            np_to_variable(sample['rpn_targets']['object'][0],
                           is_cuda=True,
                           dtype=torch.LongTensor),
            np_to_variable(sample['rpn_targets']['object'][1], is_cuda=True),
            np_to_variable(sample['rpn_targets']['object'][2], is_cuda=True),
            np_to_variable(sample['rpn_targets']['object'][3], is_cuda=True)
        ]
        try:
            # compute output
            model(input_visual, image_info, target_objects, target_relations,
                  rpn_anchor_targets_obj)
            # Determine the loss function
            if train_all:
                loss = model.loss + model.rpn.loss * 0.5
            else:
                loss = model.loss

            # to logging the loss and itermediate values
            meters['loss'].update(model.loss.data.cpu().numpy()[0],
                                  n=batch_size)
            meters['loss_cls_obj'].update(
                model.loss_cls_obj.data.cpu().numpy()[0], n=batch_size)
            meters['loss_reg_obj'].update(
                model.loss_reg_obj.data.cpu().numpy()[0], n=batch_size)
            meters['loss_cls_rel'].update(
                model.loss_cls_rel.data.cpu().numpy()[0], n=batch_size)
            meters['loss_rpn'].update(model.rpn.loss.data.cpu().numpy()[0],
                                      n=batch_size)
            meters['batch_time'].update(time.time() - end, n=batch_size)
            meters['epoch_time'].update(meters['batch_time'].val, n=batch_size)

            # add support for iter size
            # special case: last iterations
            if i % iter_size == 0 or i == len(loader) - 1:
                loss.backward()
                if clip_gradient:
                    network.clip_gradient(model, 10.)
                else:
                    network.avg_gradient(model, iter_size)
                optimizer.step()
                optimizer.zero_grad()
            else:
                loss.backward()

            end = time.time()
            # Logging the training loss
            if (i + 1) % print_freq == 0:
                print(
                    'Epoch: [{0}][{1}/{2}] '
                    'Batch_Time: {batch_time.avg: .3f}\t'
                    'FRCNN Loss: {loss.avg: .4f}\t'
                    'RPN Loss: {rpn_loss.avg: .4f}\t'.format(
                        epoch,
                        i + 1,
                        len(loader),
                        batch_time=meters['batch_time'],
                        loss=meters['loss'],
                        rpn_loss=meters['loss_rpn']))

                print(
                    '\t[object] loss_cls_obj: {loss_cls_obj.avg:.4f} '
                    'loss_reg_obj: {loss_reg_obj.avg:.4f} '
                    'loss_cls_rel: {loss_cls_rel.avg:.4f} '.format(
                        loss_cls_obj=meters['loss_cls_obj'],
                        loss_reg_obj=meters['loss_reg_obj'],
                        loss_cls_rel=meters['loss_cls_rel'],
                    ))
        except Exception:
            pdb.set_trace()

    exp_logger.log_meters('train', n=epoch)
Пример #7
0
    def forward(self, im_data, boxes, rel_boxes, SpatialFea, classes, ix1, ix2,
                args):
        im_data = network.np_to_variable(im_data, is_cuda=True)
        im_data = im_data.permute(0, 3, 1, 2)
        boxes = network.np_to_variable(boxes, is_cuda=True)
        rel_boxes = network.np_to_variable(rel_boxes, is_cuda=True)
        SpatialFea = network.np_to_variable(SpatialFea, is_cuda=True)
        classes = network.np_to_variable(classes,
                                         is_cuda=True,
                                         dtype=torch.LongTensor)
        ix1 = network.np_to_variable(ix1, is_cuda=True, dtype=torch.LongTensor)
        ix2 = network.np_to_variable(ix2, is_cuda=True, dtype=torch.LongTensor)

        x = self.conv1(im_data)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)

        x_so = self.roi_pool(x, boxes)
        x_so = x_so.view(x_so.size()[0], -1)
        x_so = self.fc6(x_so)
        x_so = F.dropout(x_so, training=self.training)
        x_so = self.fc7(x_so)
        x_so = F.dropout(x_so, training=self.training)
        obj_score = self.fc_obj(x_so)
        x_so = self.fc8(x_so)

        x_u = self.roi_pool(x, rel_boxes)
        x = x_u.view(x_u.size()[0], -1)
        x = self.fc6(x)
        x = F.dropout(x, training=self.training)
        x = self.fc7(x)
        x = F.dropout(x, training=self.training)
        x = self.fc8(x)

        if (args.use_so):
            x_s = torch.index_select(x_so, 0, ix1)
            x_o = torch.index_select(x_so, 0, ix2)
            x_so = torch.cat((x_s, x_o), 1)
            x_so = self.fc_so(x_so)
            x = torch.cat((x, x_so), 1)

        if (args.loc_type == 1):
            lo = self.fc_lov(SpatialFea)
            x = torch.cat((x, lo), 1)
        elif (args.loc_type == 2):
            lo = self.conv_lo(SpatialFea)
            lo = lo.view(lo.size()[0], -1)
            lo = self.fc_lov(lo)
            x = torch.cat((x, lo), 1)

        if (args.use_obj):
            emb = self.emb(classes)
            emb = torch.squeeze(emb, 1)
            emb_s = torch.index_select(emb, 0, ix1)
            emb_o = torch.index_select(emb, 0, ix2)
            emb_so = torch.cat((emb_s, emb_o), 1)
            emb_so = self.fc_so_emb(emb_so)
            x = torch.cat((x, emb_so), 1)

        x = self.fc_fusion(x)
        rel_score = self.fc_rel(x)
        return obj_score, rel_score