示例#1
0
    def direct_mask_loss(self, pos_idx, idx_t, loc_data, mask_data, priors,
                         masks):
        """ Crops the gt masks using the predicted bboxes, scales them down, and outputs the BCE loss. """
        loss_m = 0
        for idx in range(mask_data.shape[0]):
            with jt.no_grad():
                cur_pos_idx = pos_idx[idx]
                cur_pos_idx_squeezed = cur_pos_idx[:, 1]

                # Shape: [num_priors, 4], decoded predicted bboxes
                pos_bboxes = decode(loc_data[idx], priors.data,
                                    cfg.use_yolo_regressors)
                pos_bboxes = pos_bboxes[cur_pos_idx].view(-1, 4).clamp(0, 1)
                pos_lookup = idx_t[idx, cur_pos_idx_squeezed]

                cur_masks = masks[idx]
                pos_masks = cur_masks[pos_lookup]

                # Convert bboxes to absolute coordinates
                num_pos, img_height, img_width = pos_masks.shape

                # Take care of all the bad behavior that can be caused by out of bounds coordinates
                x1, x2 = sanitize_coordinates(pos_bboxes[:, 0],
                                              pos_bboxes[:, 2], img_width)
                y1, y2 = sanitize_coordinates(pos_bboxes[:, 1],
                                              pos_bboxes[:, 3], img_height)

                # Crop each gt mask with the predicted bbox and rescale to the predicted mask size
                # Note that each bounding box crop is a different size so I don't think we can vectorize this
                scaled_masks = []
                for jdx in range(num_pos):
                    tmp_mask = pos_masks[jdx, y1[jdx]:y2[jdx], x1[jdx]:x2[jdx]]

                    # Restore any dimensions we've left out because our bbox was 1px wide
                    while tmp_mask.ndim < 2:
                        tmp_mask = tmp_mask.unsqueeze(0)

                    new_mask = nn.AdaptiveAvgPool2d(cfg.mask_size)(
                        tmp_mask.unsqueeze(0))
                    scaled_masks.append(new_mask.view(1, -1))

                mask_t = (jt.contrib.concat(scaled_masks, 0) >
                          0.5).float()  # Threshold downsampled mask

            pos_mask_data = mask_data[idx, cur_pos_idx_squeezed, :]
            loss_m += nn.bce_loss(jt.clamp(pos_mask_data, 0, 1),
                                  mask_t,
                                  size_average=False) * cfg.mask_alpha

        return loss_m
示例#2
0
 def __init__(self, probs=None, logits=None):
     assert not (probs is None and logits is None)
     if probs is None:
         # cannot align to pytorch
         probs = jt.sigmoid(logits)
     probs = probs / probs.sum(-1, True)
     if logits is None:
         logits = jt.safe_log(probs)
     with jt.no_grad():
         self.probs = probs
         self.logits = logits
         self.cum_probs = simple_presum(self.probs)
         self.cum_probs_l = self.cum_probs[..., :-1]
         self.cum_probs_r = self.cum_probs[..., 1:]
示例#3
0
    def conf_objectness_loss(self, conf_data, conf_t, batch_size, loc_p, loc_t,
                             priors):
        """
        Instead of using softmax, use class[0] to be p(obj) * p(IoU) as in YOLO.
        Then for the rest of the classes, softmax them and apply CE for only the positive examples.
        """

        conf_t = conf_t.view(-1)  # [batch_size*num_priors]
        conf_data = conf_data.view(
            -1, conf_data.shape[-1])  # [batch_size*num_priors, num_classes]

        pos_mask = (conf_t > 0)
        neg_mask = (conf_t == 0)

        obj_data = conf_data[:, 0]
        obj_data_pos = obj_data[pos_mask]
        obj_data_neg = obj_data[neg_mask]

        # Don't be confused, this is just binary cross entropy similified
        obj_neg_loss = -nn.log_sigmoid(-obj_data_neg).sum()

        with jt.no_grad():
            pos_priors = priors.unsqueeze(0).expand(batch_size, -1,
                                                    -1).reshape(-1,
                                                                4)[pos_mask]

            boxes_pred = decode(loc_p, pos_priors, cfg.use_yolo_regressors)
            boxes_targ = decode(loc_t, pos_priors, cfg.use_yolo_regressors)

            iou_targets = elemwise_box_iou(boxes_pred, boxes_targ)

        obj_pos_loss = -iou_targets * nn.log_sigmoid(obj_data_pos) - (
            1 - iou_targets) * nn.log_sigmoid(-obj_data_pos)
        obj_pos_loss = obj_pos_loss.sum()

        # All that was the objectiveness loss--now time for the class confidence loss
        conf_data_pos = (
            conf_data[:, 1:])[pos_mask]  # Now this has just 80 classes
        conf_t_pos = conf_t[pos_mask] - 1  # So subtract 1 here

        class_loss = nn.cross_entropy_loss(conf_data_pos,
                                           conf_t_pos,
                                           size_average=False)

        return cfg.conf_alpha * (class_loss + obj_pos_loss + obj_neg_loss)
示例#4
0
def prepare_data(datum, allocation: list = None):
    with jt.no_grad():
        if allocation is None:
            allocation = []
            allocation.append(args.batch_size -
                              sum(allocation))  # The rest might need more/less

        images, (targets, masks, num_crowds) = datum

        cur_idx = 0
        for alloc in allocation:
            for _ in range(alloc):
                images[cur_idx] = gradinator(images[cur_idx])
                targets[cur_idx] = gradinator(targets[cur_idx])
                masks[cur_idx] = gradinator(masks[cur_idx])
                cur_idx += 1

        if cfg.preserve_aspect_ratio:
            # Choose a random size from the batch
            _, h, w = images[random.randint(0, len(images) - 1)].shape

            for idx, (image, target, mask, num_crowd) in enumerate(
                    zip(images, targets, masks, num_crowds)):
                images[idx], targets[idx], masks[idx], num_crowds[idx] \
                    = enforce_size(image, target, mask, num_crowd, w, h)

        cur_idx = 0
        split_images, split_targets, split_masks, split_numcrowds \
            = [[None for alloc in allocation] for _ in range(4)]

        for device_idx, alloc in enumerate(allocation):
            split_images[device_idx] = jt.stack(images[cur_idx:cur_idx +
                                                       alloc],
                                                dim=0)
            split_targets[device_idx] = targets[cur_idx:cur_idx + alloc]
            split_masks[device_idx] = masks[cur_idx:cur_idx + alloc]
            split_numcrowds[device_idx] = num_crowds[cur_idx:cur_idx + alloc]

            cur_idx += alloc

        return split_images[0], split_targets[0], split_masks[
            0], split_numcrowds[0]
示例#5
0
 def _forward_train(self, anchors, objectness, rpn_box_regression, targets):
     if self.cfg.MODEL.RPN_ONLY:
         # When training an RPN-only model, the loss is determined by the
         # predicted objectness and rpn_box_regression values and there is
         # no need to transform the anchors into predicted boxes; this is an
         # optimization that avoids the unnecessary transformation.
         boxes = anchors
     else:
         # For end-to-end models, anchors must be transformed into boxes and
         # sampled into a training batch.
         with jt.no_grad():
             boxes = self.box_selector_train(anchors, objectness,
                                             rpn_box_regression, targets)
     loss_objectness, loss_rpn_box_reg = self.loss_evaluator(
         anchors, objectness, rpn_box_regression, targets)
     losses = {
         "loss_objectness": loss_objectness,
         "loss_rpn_box_reg": loss_rpn_box_reg,
     }
     return boxes, losses
示例#6
0
def enforce_size(img, targets, masks, num_crowds, new_w, new_h):
    """ Ensures that the image is the given size without distorting aspect ratio. """
    with jt.no_grad():
        _, h, w = img.size()

        if h == new_h and w == new_w:
            return img, targets, masks, num_crowds

        # Resize the image so that it fits within new_w, new_h
        w_prime = new_w
        h_prime = h * new_w / w

        if h_prime > new_h:
            w_prime *= new_h / h_prime
            h_prime = new_h

        w_prime = int(w_prime)
        h_prime = int(h_prime)

        # Do all the resizing
        img = nn.interpolate(img.unsqueeze(0), (h_prime, w_prime),
                             mode='bilinear',
                             align_corners=False)
        img.squeeze_(0)

        # Act like each object is a color channel
        masks = nn.interpolate(masks.unsqueeze(0), (h_prime, w_prime),
                               mode='bilinear',
                               align_corners=False)
        masks.squeeze_(0)

        # Scale bounding boxes (this will put them in the top left corner in the case of padding)
        targets[:, [0, 2]] *= (w_prime / new_w)
        targets[:, [1, 3]] *= (h_prime / new_h)

        # Finally, pad everything to be the new_w, new_h
        pad_dims = (0, new_w - w_prime, 0, new_h - h_prime)
        img = F.pad(img, pad_dims, mode='constant', value=0)
        masks = F.pad(masks, pad_dims, mode='constant', value=0)

        return img, targets, masks, num_crowds
示例#7
0
    def execute(self, features, proposals, targets=None):
        """
        Arguments:
            features (list[Tensor]): feature-maps from possibly several levels
            proposals (list[BoxList]): proposal boxes
            targets (list[BoxList], optional): the ground-truth targets.

        Returns:
            x (Tensor): the result of the feature extractor
            proposals (list[BoxList]): during training, the subsampled proposals
                are returned. During testing, the predicted boxlists are returned
            losses (dict[Tensor]): During training, returns the losses for the
                head. During testing, returns an empty dict.
        """
        if self.is_training():
            # Faster R-CNN subsamples during training the proposals with a fixed
            # positive / negative ratio

            with jt.no_grad():
                proposals = self.loss_evaluator.subsample(proposals, targets)
        # extract features that will be fed to the final classifier. The
        # feature_extractor generally corresponds to the pooler + heads

        x = self.feature_extractor(features, proposals)

        # final classifier that converts the features into predictions
        class_logits, box_regression = self.predictor(x)

        if not self.is_training():
            result = self.post_processor((class_logits, box_regression),
                                         proposals)
            return x, result, {}

        loss_classifier, loss_box_reg = self.loss_evaluator([class_logits],
                                                            [box_regression])
        return (
            x,
            proposals,
            dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg),
        )
示例#8
0
def compute_validation_map(epoch,
                           iteration,
                           yolact_net,
                           dataset,
                           log: Log = None):
    with jt.no_grad():
        yolact_net.eval()

        start = time.time()
        print()
        print("Computing validation mAP (this may take a while)..", flush=True)
        val_info = eval_script.evaluate(yolact_net, dataset, train_mode=True)
        end = time.time()

        if log is not None:
            log.log('val',
                    val_info,
                    elapsed=(end - start),
                    epoch=epoch,
                    iter=iteration)

        yolact_net.train()
示例#9
0
    def semantic_segmentation_loss(self, segment_data, mask_t, class_t, interpolation_mode='bilinear'):
        # Note num_classes here is without the background class so cfg.num_classes-1
        batch_size, num_classes, mask_h, mask_w = segment_data.shape
        loss_s = 0
        
        for idx in range(batch_size):
            cur_segment = segment_data[idx]
            cur_class_t = class_t[idx]

            with jt.no_grad():
                downsampled_masks = nn.interpolate(mask_t[idx].unsqueeze(0), (mask_h, mask_w),
                                                  mode=interpolation_mode, align_corners=False).squeeze(0)
                downsampled_masks = (downsampled_masks>0.5).float()
                
                # Construct Semantic Segmentation
                segment_t = jt.zeros_like(cur_segment)
                segment_t.stop_grad()
                for obj_idx in range(downsampled_masks.shape[0]):
                    segment_t[cur_class_t[obj_idx]] = jt.maximum(segment_t[cur_class_t[obj_idx]], downsampled_masks[obj_idx])
            
            loss_s += nn.BCEWithLogitsLoss(size_average=False)(cur_segment, segment_t)
        
        return loss_s / mask_h / mask_w * cfg.semantic_segmentation_alpha
示例#10
0
def compute_on_dataset(model, data_loader, bbox_aug, timer=None):
    model.eval()
    results_dict = {}
    data_loader.is_train=False
    data_loader.num_workers = 4
    start_time = 0
    import cProfile as profiler
    for i, batch in enumerate(tqdm(data_loader)):
        if i==20:
            # For fair comparison,remove jittor compiling time 
            start_time = time.time()
            # jt.profiler.start()

        with nvtx_scope("preprocess"):
            images, image_sizes, image_ids = batch
            # images = ImageList(jt.array(images),image_sizes)
        with nvtx_scope("model"):
            with jt.no_grad():
                if timer:
                    timer.tic()
                if bbox_aug:
                    output = im_detect_bbox_aug(model, images)
                else:
                    output = model(images)
                if timer:
                    timer.toc()
        with nvtx_scope("detach"):
            output = detach_output(output)
            results_dict.update(
                    {img_id: result for img_id, result in zip(image_ids, output)}
                )
    
    end_time = time.time()
    print('fps',(5000-20*data_loader.batch_size)/(end_time-start_time))

    return results_dict
示例#11
0
    def sample(self, n):
        shape = (n, ) + self.loc.shape
        with jt.no_grad():
            eps = jt.randn(shape)

            return self.loc + self.scale * eps
示例#12
0
def validate():
    bs = 256
    # create model
    model = create_model('vit_base_patch16_224',
                         pretrained=True,
                         num_classes=1000)
    criterion = nn.CrossEntropyLoss()

    dataset = create_val_dataset(root='/data/imagenet',
                                 batch_size=bs,
                                 num_workers=4,
                                 img_size=224)

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    model.eval()
    with jt.no_grad():
        input = jt.random((bs, 3, 224, 224))
        model(input)

        end = time.time()
        for batch_idx, (input, target) in enumerate(dataset):
            # dataset.display_worker_status()
            batch_size = input.shape[0]
            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss, batch_size)
            top1.update(acc1, batch_size)
            top5.update(acc5, batch_size)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if batch_idx % 10 == 0:
                # jt.sync_all(True)
                print(
                    'Test: [{0:>4d}/{1}]  '
                    'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                    'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f})  '
                    'Acc@1: {top1.val:>7.3f} ({top1.avg:>7.3f})  '
                    'Acc@5: {top5.val:>7.3f} ({top5.avg:>7.3f})'.format(
                        batch_idx,
                        len(dataset),
                        batch_time=batch_time,
                        rate_avg=batch_size / batch_time.avg,
                        loss=losses,
                        top1=top1,
                        top5=top5))

            # if batch_idx>50:break

    top1a, top5a = top1.avg, top5.avg
    top1 = round(top1a, 4)
    top1_err = round(100 - top1a, 4)
    top5 = round(top5a, 4)
    top5_err = round(100 - top5a, 4)

    print(' * Acc@1 {:.3f} ({:.3f}) Acc@5 {:.3f} ({:.3f})'.format(
        top1, top1_err, top5, top5_err))
示例#13
0
def compute_on_dataset(model, data_loader, bbox_aug, timer=None):
    model.eval()
    results_dict = {}
    data_loader.is_train = False
    data_loader.num_workers = 4
    start_time = 0
    # jt.profiler.start(0, 0)
    for i, batch in enumerate(tqdm(data_loader)):
        # data_loader.display_worker_status()

        #if i<125:continue
        #jt.sync_all()
        #print(1,time.asctime())
        #jt.display_memory_info()
        #if i<187:continue
        # if i>50:break
        # if i==0:continue
        if i == 20:
            # For fair comparison,remove jittor compiling time
            start_time = time.time()

        # with nvtx_scope("preprocess"):
        #     images, targets, image_ids = batch
        #     new_targets = []
        #     new_images = []
        #     # transforms= data_loader._transforms
        #     for image,target in zip(images,targets):
        #         # print(target.bbox)
        #         # print(target.get_field('labels'))
        #         labels = target.get_field('labels')
        #         labels = jt.array(labels)
        #         # print(labels)

        #         target.add_field('labels',labels)

        #         target.to_jittor()
        #         target = target.convert('xyxy')
        #         if target.has_field('masks'):
        #             target.get_field('masks').to_jittor()
        #         target = target.clip_to_image(remove_empty=True)
        #         # with nvtx_scope("transforms"):
        #         #     if transforms is not None:
        #         #         image,target = transforms(image,target)
        #         new_images.append(jt.array(image))
        #         new_targets.append(target)

        #     images = to_image_list(new_images,data_loader.collate_batch.size_divisible)
        #     targets = new_targets

        #     images.tensors = images.tensors.float32()

        # with nvtx_scope("preprocess"):
        #     images, image_sizes, image_ids = batch
        #     images = ImageList(images,image_sizes)
        # # print('Model!!!!')
        # with nvtx_scope("model"):
        #     with jt.no_grad():
        #         if timer:
        #             timer.tic()
        #         if bbox_aug:
        #             output = im_detect_bbox_aug(model, images)
        #         else:
        #             output = model(images)
        #         if timer:
        #             timer.toc()
        # # print('Model Finished')
        # # jt.sync_all(True)
        # with nvtx_scope("get_data"):
        #     output = detach_output(output)
        #     results_dict.update(
        #             {img_id: result for img_id, result in zip(image_ids, output)}
        #         )
        #     #jt.sync_all()
        #     #print(7,time.asctime())
        #     #jt.fetch(image_ids, output, lambda image_ids, output: \
        #     #    results_dict.update(
        #     #        {img_id: result for img_id, result in zip(image_ids, output)}
        #     #    )
        #     #)
        images, image_sizes, image_ids = batch
        images = ImageList(jt.array(images), image_sizes)
        # print(images.tensors.mean(),images.tensors.shape,image_sizes)
        # print(image_ids)
        # images = to_image_list(images,data_loader.collate_batch.size_divisible)
        # images.tensors = images.tensors.float32()
        with jt.no_grad():
            if timer:
                timer.tic()
            if bbox_aug:
                output = im_detect_bbox_aug(model, images)
            else:
                output = model(images)
            if timer:
                timer.toc()
        # jt.sync_all(True)
        output = detach_output(output)
        results_dict.update(
            {img_id: result
             for img_id, result in zip(image_ids, output)})

    end_time = time.time()
    print('fps',
          (5000 - 20 * data_loader.batch_size) / (end_time - start_time))
    #jt.sync_all()

    # jt.profiler.stop()
    # jt.profiler.report()

    return results_dict
示例#14
0
def postprocess(det_output,
                w,
                h,
                batch_idx=0,
                interpolation_mode='bilinear',
                visualize_lincomb=False,
                crop_masks=True,
                score_threshold=0):
    """
    Postprocesses the output of Yolact on testing mode into a format that makes sense,
    accounting for all the possible configuration settings.

    Args:
        - det_output: The lost of dicts that Detect outputs.
        - w: The real with of the image.
        - h: The real height of the image.
        - batch_idx: If you have multiple images for this batch, the image's index in the batch.
        - interpolation_mode: Can be 'nearest' | 'area' | 'bilinear' (see jt.nn.functional.interpolate)

    Returns 4 jt Tensors (in the following order):
        - classes [num_det]: The class idx for each detection.
        - scores  [num_det]: The confidence score for each detection.
        - boxes   [num_det, 4]: The bounding box for each detection in absolute point form.
        - masks   [num_det, h, w]: Full image masks for each detection.
    """

    dets = det_output[batch_idx]
    net = dets['net']
    dets = dets['detection']

    if dets is None:
        return [jt.array([])
                ] * 4  # Warning, this is 4 copies of the same thing

    if score_threshold > 0:
        keep = dets['score'] > score_threshold

        for k in dets:
            if k != 'proto':
                dets[k] = dets[k][keep]

        if dets['score'].shape[0] == 0:
            return [jt.array([])] * 4

    # Actually extract everything from dets now
    classes = dets['class']
    boxes = dets['box']
    scores = dets['score']
    masks = dets['mask']

    if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch:
        # At this points masks is only the coefficients
        proto_data = dets['proto']

        # Test flag, do not upvote
        if cfg.mask_proto_debug:
            np.save('scripts/proto.npy', proto_data.numpy())

        if visualize_lincomb:
            display_lincomb(proto_data, masks)

        masks = jt.matmul(proto_data, masks.transpose(1, 0))
        masks = cfg.mask_proto_mask_activation(masks)

        # Crop masks before upsampling because you know why
        if crop_masks:
            masks = crop(masks, boxes)

        # Permute into the correct output shape [num_dets, proto_h, proto_w]
        masks = masks.permute(2, 0, 1)

        if cfg.use_maskiou:
            with timer.env('maskiou_net'):
                with jt.no_grad():
                    maskiou_p = net.maskiou_net(masks.unsqueeze(1))
                    maskiou_p = jt.gather(
                        maskiou_p, dim=1,
                        index=classes.unsqueeze(1)).squeeze(1)
                    if cfg.rescore_mask:
                        if cfg.rescore_bbox:
                            scores = scores * maskiou_p
                        else:
                            scores = [scores, scores * maskiou_p]

        # Scale masks up to the full image
        masks = nn.interpolate(masks.unsqueeze(0), (h, w),
                               mode=interpolation_mode,
                               align_corners=False).squeeze(0)

        # Binarize the masks
        masks = masks > 0.5

    boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0],
                                                    boxes[:, 2],
                                                    w,
                                                    cast=False)
    boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1],
                                                    boxes[:, 3],
                                                    h,
                                                    cast=False)
    boxes = boxes.int32()

    if cfg.mask_type == mask_type.direct and cfg.eval_mask_branch:
        # Upscale masks
        full_masks = jt.zeros(masks.shape[0], h, w)

        for jdx in range(masks.shape[0]):
            x1, y1, x2, y2 = boxes[jdx]

            mask_w = x2 - x1
            mask_h = y2 - y1

            # Just in case
            if mask_w * mask_h <= 0 or mask_w < 0:
                continue

            mask = masks[jdx].view(1, 1, cfg.mask_size, cfg.mask_size)
            mask = nn.interpolate(mask, (mask_h, mask_w),
                                  mode=interpolation_mode,
                                  align_corners=False)
            mask = (mask > 0.5).float()
            full_masks[jdx, y1:y2, x1:x2] = mask

        masks = full_masks

    return classes, scores, boxes, masks
示例#15
0
def train():

    parser = config_parser()
    args = parser.parse_args()

    # Load data
    intrinsic = None
    if args.dataset_type == 'llff':
        images, poses, bds, render_poses, i_test = load_llff_data(
            args.datadir,
            args.factor,
            recenter=True,
            bd_factor=.75,
            spherify=args.spherify)
        hwf = poses[0, :3, -1]
        poses = poses[:, :3, :4]
        print('Loaded llff', images.shape, render_poses.shape, hwf,
              args.datadir)
        if not isinstance(i_test, list):
            i_test = [i_test]

        if args.llffhold > 0:
            print('Auto LLFF holdout,', args.llffhold)
            i_test = np.arange(images.shape[0])[::args.llffhold]

        i_val = i_test
        i_train = np.array([
            i for i in np.arange(int(images.shape[0]))
            if (i not in i_test and i not in i_val)
        ])

        print('DEFINING BOUNDS')
        if args.no_ndc:
            near = np.ndarray.min(bds) * .9
            far = np.ndarray.max(bds) * 1.

        else:
            near = 0.
            far = 1.
        print('NEAR FAR', near, far)

    elif args.dataset_type == 'blender':
        testskip = args.testskip
        faketestskip = args.faketestskip
        if jt.mpi and jt.mpi.local_rank() != 0:
            testskip = faketestskip
            faketestskip = 1
        if args.do_intrinsic:
            images, poses, intrinsic, render_poses, hwf, i_split = load_blender_data(
                args.datadir, args.half_res, args.testskip,
                args.blender_factor, True)
        else:
            images, poses, render_poses, hwf, i_split = load_blender_data(
                args.datadir, args.half_res, args.testskip,
                args.blender_factor)
        print('Loaded blender', images.shape, render_poses.shape, hwf,
              args.datadir)
        i_train, i_val, i_test = i_split
        i_test_tot = i_test
        i_test = i_test[::args.faketestskip]

        near = args.near
        far = args.far
        print(args.do_intrinsic)
        print("hwf", hwf)
        print("near", near)
        print("far", far)

        if args.white_bkgd:
            images = images[..., :3] * images[..., -1:] + (1. -
                                                           images[..., -1:])
        else:
            images = images[..., :3]

    elif args.dataset_type == 'deepvoxels':

        images, poses, render_poses, hwf, i_split = load_dv_data(
            scene=args.shape, basedir=args.datadir, testskip=args.testskip)

        print('Loaded deepvoxels', images.shape, render_poses.shape, hwf,
              args.datadir)
        i_train, i_val, i_test = i_split

        hemi_R = np.mean(np.linalg.norm(poses[:, :3, -1], axis=-1))
        near = hemi_R - 1.
        far = hemi_R + 1.

    else:
        print('Unknown dataset type', args.dataset_type, 'exiting')
        return

    # Cast intrinsics to right types
    H, W, focal = hwf
    H, W = int(H), int(W)
    hwf = [H, W, focal]

    render_poses = np.array(poses[i_test])

    # Create log dir and copy the config file
    basedir = args.basedir
    expname = args.expname
    os.makedirs(os.path.join(basedir, expname), exist_ok=True)
    f = os.path.join(basedir, expname, 'args.txt')
    with open(f, 'w') as file:
        for arg in sorted(vars(args)):
            attr = getattr(args, arg)
            file.write('{} = {}\n'.format(arg, attr))
    if args.config is not None:
        f = os.path.join(basedir, expname, 'config.txt')
        with open(f, 'w') as file:
            file.write(open(args.config, 'r').read())

    # Create nerf model
    render_kwargs_train, render_kwargs_test, start, grad_vars, optimizer = create_nerf(
        args)
    global_step = start

    bds_dict = {
        'near': near,
        'far': far,
    }
    render_kwargs_train.update(bds_dict)
    render_kwargs_test.update(bds_dict)

    # Move testing data to GPU
    render_poses = jt.array(render_poses)

    # Short circuit if only rendering out from trained model
    if args.render_only:
        print('RENDER ONLY')
        with jt.no_grad():
            testsavedir = os.path.join(
                basedir, expname, 'renderonly_{}_{:06d}'.format(
                    'test' if args.render_test else 'path', start))
            os.makedirs(testsavedir, exist_ok=True)
            print('test poses shape', render_poses.shape)

            rgbs, _ = render_path(render_poses,
                                  hwf,
                                  args.chunk,
                                  render_kwargs_test,
                                  savedir=testsavedir,
                                  render_factor=args.render_factor)
            print('Done rendering', testsavedir)
            imageio.mimwrite(os.path.join(testsavedir, 'video.mp4'),
                             to8b(rgbs),
                             fps=30,
                             quality=8)

            return

    # Prepare raybatch tensor if batching random rays
    accumulation_steps = 1
    N_rand = args.N_rand // accumulation_steps
    use_batching = not args.no_batching
    if use_batching:
        # For random ray batching
        print('get rays')
        rays = np.stack(
            [get_rays_np(H, W, focal, p) for p in poses[:, :3, :4]],
            0)  # [N, ro+rd, H, W, 3]
        print('done, concats')
        rays_rgb = np.concatenate([rays, images[:, None]],
                                  1)  # [N, ro+rd+rgb, H, W, 3]
        rays_rgb = np.transpose(rays_rgb,
                                [0, 2, 3, 1, 4])  # [N, H, W, ro+rd+rgb, 3]
        rays_rgb = np.stack([rays_rgb[i] for i in i_train],
                            0)  # train images only
        rays_rgb = np.reshape(rays_rgb,
                              [-1, 3, 3])  # [(N-1)*H*W, ro+rd+rgb, 3]
        rays_rgb = rays_rgb.astype(np.float32)
        print('shuffle rays')
        np.random.shuffle(rays_rgb)

        print('done')
        i_batch = 0

    # Move training data to GPU
    images = jt.array(images.astype(np.float32))
    poses = jt.array(poses)
    if use_batching:
        rays_rgb = jt.array(rays_rgb)

    N_iters = 51000
    print('Begin')
    print('TRAIN views are', i_train)
    print('TEST views are', i_test)
    print('VAL views are', i_val)

    # Summary writers
    # writer = SummaryWriter(os.path.join(basedir, 'summaries', expname))
    if not jt.mpi or jt.mpi.local_rank() == 0:
        date = str(datetime.datetime.now())
        date = date[:date.rfind(":")].replace("-", "")\
                                        .replace(":", "")\
                                        .replace(" ", "_")
        gpu_idx = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
        log_dir = os.path.join("./logs", "summaries",
                               "log_" + date + "_gpu" + gpu_idx)
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)
        writer = SummaryWriter(log_dir=log_dir)

    start = start + 1
    for i in trange(start, N_iters):
        # jt.display_memory_info()
        time0 = time.time()

        # Sample random ray batch
        if use_batching:
            # Random over all images
            batch = rays_rgb[i_batch:i_batch + N_rand]  # [B, 2+1, 3*?]
            batch = jt.transpose(batch, (1, 0, 2))
            batch_rays, target_s = batch[:2], batch[2]

            i_batch += N_rand
            if i_batch >= rays_rgb.shape[0]:
                print("Shuffle data after an epoch!")
                rand_idx = jt.randperm(rays_rgb.shape[0])
                rays_rgb = rays_rgb[rand_idx]
                i_batch = 0

        else:
            # Random from one image
            np.random.seed(i)
            img_i = np.random.choice(i_train)
            target = images[img_i]  #.squeeze(0)
            pose = poses[img_i, :3, :4]  #.squeeze(0)
            if N_rand is not None:
                rays_o, rays_d = pinhole_get_rays(
                    H, W, focal, pose, intrinsic)  # (H, W, 3), (H, W, 3)
                if i < args.precrop_iters:
                    dH = int(H // 2 * args.precrop_frac)
                    dW = int(W // 2 * args.precrop_frac)
                    coords = jt.stack(
                        jt.meshgrid(
                            jt.linspace(H // 2 - dH, H // 2 + dH - 1, 2 * dH),
                            jt.linspace(W // 2 - dW, W // 2 + dW - 1, 2 * dW)),
                        -1)
                    if i == start:
                        print(
                            f"[Config] Center cropping of size {2*dH} x {2*dW} is enabled until iter {args.precrop_iters}"
                        )
                else:
                    coords = jt.stack(
                        jt.meshgrid(jt.linspace(0, H - 1, H),
                                    jt.linspace(0, W - 1, W)), -1)  # (H, W, 2)

                coords = jt.reshape(coords, [-1, 2])  # (H * W, 2)
                select_inds = np.random.choice(coords.shape[0],
                                               size=[N_rand],
                                               replace=False)  # (N_rand,)
                select_coords = coords[select_inds].int()  # (N_rand, 2)
                rays_o = rays_o[select_coords[:, 0],
                                select_coords[:, 1]]  # (N_rand, 3)
                rays_d = rays_d[select_coords[:, 0],
                                select_coords[:, 1]]  # (N_rand, 3)
                batch_rays = jt.stack([rays_o, rays_d], 0)
                target_s = target[select_coords[:, 0],
                                  select_coords[:, 1]]  # (N_rand, 3)

        #####  Core optimization loop  #####
        rgb, disp, acc, extras = render(H,
                                        W,
                                        focal,
                                        chunk=args.chunk,
                                        rays=batch_rays,
                                        verbose=i < 10,
                                        retraw=True,
                                        **render_kwargs_train)
        img_loss = img2mse(rgb, target_s)
        trans = extras['raw'][..., -1]
        loss = img_loss
        psnr = mse2psnr(img_loss)

        if 'rgb0' in extras:
            img_loss0 = img2mse(extras['rgb0'], target_s)
            loss = loss + img_loss0
            psnr0 = mse2psnr(img_loss0)

        optimizer.backward(loss / accumulation_steps)
        if i % accumulation_steps == 0:
            optimizer.step()

        ###   update learning rate   ###
        decay_rate = 0.1
        decay_steps = args.lrate_decay * accumulation_steps * 1000
        new_lrate = args.lrate * (decay_rate**(global_step / decay_steps))
        for param_group in optimizer.param_groups:
            param_group['lr'] = new_lrate
        ################################

        dt = time.time() - time0

        # Rest is logging
        if (i + 1) % args.i_weights == 0 and (not jt.mpi
                                              or jt.mpi.local_rank() == 0):
            print(i)
            path = os.path.join(basedir, expname, '{:06d}.tar'.format(i))
            jt.save(
                {
                    'global_step':
                    global_step,
                    'network_fn_state_dict':
                    render_kwargs_train['network_fn'].state_dict(),
                    'network_fine_state_dict':
                    render_kwargs_train['network_fine'].state_dict(),
                }, path)
            print('Saved checkpoints at', path)

        if i % args.i_video == 0 and i > 0:
            # Turn on testing mode
            with jt.no_grad():
                rgbs, disps = render_path(render_poses,
                                          hwf,
                                          args.chunk,
                                          render_kwargs_test,
                                          intrinsic=intrinsic)
            if not jt.mpi or jt.mpi.local_rank() == 0:
                print('Done, saving', rgbs.shape, disps.shape)
                moviebase = os.path.join(
                    basedir, expname, '{}_spiral_{:06d}_'.format(expname, i))
                print('movie base ', moviebase)
                imageio.mimwrite(moviebase + 'rgb.mp4',
                                 to8b(rgbs),
                                 fps=30,
                                 quality=8)
                imageio.mimwrite(moviebase + 'disp.mp4',
                                 to8b(disps / np.max(disps)),
                                 fps=30,
                                 quality=8)

        if i % args.i_print == 0:
            tqdm.write(
                f"[TRAIN] Iter: {i} Loss: {loss.item()}  PSNR: {psnr.item()}")
            if i % args.i_img == 0:
                img_i = np.random.choice(i_val)
                target = images[img_i]
                pose = poses[img_i, :3, :4]
                with jt.no_grad():
                    rgb, disp, acc, extras = render(H,
                                                    W,
                                                    focal,
                                                    chunk=args.chunk,
                                                    c2w=pose,
                                                    intrinsic=intrinsic,
                                                    **render_kwargs_test)
                psnr = mse2psnr(img2mse(rgb, target))
                rgb = rgb.numpy()
                disp = disp.numpy()
                acc = acc.numpy()

                if not jt.mpi or jt.mpi.local_rank() == 0:
                    writer.add_image('test/rgb',
                                     to8b(rgb),
                                     global_step,
                                     dataformats="HWC")
                    writer.add_image('test/target',
                                     target.numpy(),
                                     global_step,
                                     dataformats="HWC")
                    writer.add_scalar('test/psnr', psnr.item(), global_step)

            jt.clean_graph()
            jt.sync_all()
            jt.gc()

            if i % args.i_testset == 0 and i > 0:
                si_test = i_test_tot if i % args.i_tottest == 0 else i_test
                testsavedir = os.path.join(basedir, expname,
                                           'testset_{:06d}'.format(i))
                os.makedirs(testsavedir, exist_ok=True)
                print('test poses shape', poses[si_test].shape)
                with jt.no_grad():
                    rgbs, disps = render_path(jt.array(poses[si_test]),
                                              hwf,
                                              args.chunk,
                                              render_kwargs_test,
                                              savedir=testsavedir,
                                              intrinsic=intrinsic,
                                              expname=expname)
                jt.gc()
        global_step += 1
示例#16
0
def do_train(
    cfg,
    model,
    data_loader,
    data_loader_val,
    optimizer,
    scheduler,
    checkpointer,
    checkpoint_period,
    test_period,
    arguments,
):
    logger = logging.getLogger("detectron.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints",)
    dataset_names = cfg.DATASETS.TEST

    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        
        if any(len(target) < 1 for target in targets):
            logger.error(f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" )
            continue
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration


        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = loss_dict
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        optimizer.step(losses)
        scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join(
                    [
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                        "max mem: {memory:.0f}",
                    ]
                ).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=1024 / 1024.0 / 1024.0, # TODO CUDA Memory
                )
            )
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = MetricLogger(delimiter="  ")
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg, is_train=False, is_distributed=False, is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
            )
            model.train()
            with jt.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val)):
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = loss_dict
                    losses_reduced = sum(loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            logger.info(
                meters_val.delimiter.join(
                    [
                        "[Validation]: ",
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                        "max mem: {memory:.0f}",
                    ]
                ).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory= 2014 / 1024.0 / 1024.0,# TODO torch.cuda.max_memory_allocated()
                )
            )
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info(
        "Total training time: {} ({:.4f} s / it)".format(
            total_time_str, total_training_time / (max_iter)
        )
    )
示例#17
0
文件: perf.py 项目: yuntaolu/jittor
def test(name, model_name, bs):
    print("hello", name, model_name, bs)
    import numpy as np
    import time
    is_train = False
    _model_name = model_name
    if model_name.startswith("train_"):
        is_train = True
        model_name = model_name[6:]
    if name == "torch":
        import torch
        import torchvision.models as tcmodels
        from torch import optim
        from torch import nn
        torch.backends.cudnn.deterministic = False
        torch.backends.cudnn.benchmark = True
        model = tcmodels.__dict__[model_name]()
        model = model.cuda()
    else:
        import jittor as jt
        from jittor import optim
        from jittor import nn
        jt.flags.use_cuda = 1
        jt.cudnn.set_algorithm_cache_size(10000)
        import jittor.models as jtmodels
        model = jtmodels.__dict__[model_name]()
        if (model == "resnet152" or model == "resnet101") and bs == 128 and is_train:
            jt.cudnn.set_max_workspace_ratio(0.05)
    if is_train:
        model.train()
    else:
        model.eval()
    img_size = 224
    if model_name == "inception_v3":
        img_size = 300
    test_img = np.random.random((bs, 3, img_size, img_size)).astype("float32")
    if is_train:
        label = (np.random.random((bs,)) * 1000).astype("int32")
    if name == "torch":
        test_img = torch.Tensor(test_img).cuda()
        if is_train:
            label = torch.LongTensor(label).cuda()
            opt = optim.SGD(model.parameters(), 0.001)
        sync = lambda: torch.cuda.synchronize()
        jt = torch
    else:
        test_img = jt.array(test_img).stop_grad()
        if is_train:
            label = jt.array(label).stop_grad()
            opt = optim.SGD(model.parameters(), 0.001)
        sync = lambda: jt.sync_all(True)

    sync()
    use_profiler = os.environ.get("use_profiler", "0") == "1"
    if hasattr(jt, "nograd"):
        ng = jt.no_grad()
        ng.__enter__()
    def iter():
        x = model(test_img)
        if isinstance(x, tuple):
            x = x[0]
        if is_train:
            loss = nn.CrossEntropyLoss()(x, label)
            if name == "jittor":
                opt.step(loss)
            else:
                opt.zero_grad()
                loss.backward()
                opt.step()
        else:
            x.sync()
    sync()
    for i in time_iter():
        iter()
    sync()
    for i in time_iter():
        iter()
    sync()
    if use_profiler:
        if name == "torch":
            prof = torch.autograd.profiler.profile(use_cuda=True)
        else:
            prof = jt.profile_scope()
        prof.__enter__()
    if name == "jittor":
        if hasattr(jt.flags, "use_parallel_op_compiler"):
            jt.flags.use_parallel_op_compiler = 0
    start = time.time()
    for i in time_iter(10):
        iter()
    sync()
    end = time.time()
    if use_profiler:
        prof.__exit__(None,None,None)
        if name == "torch":
            print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=30))
    total_iter = i+1
    print("duration:", end-start, "FPS:", total_iter*bs/(end-start))
    fpath = f"{home_path}/.cache/jittor/{name}-{_model_name}-{bs}.txt"
    with open(fpath, 'w') as f:
        f.write(f"duration: {end-start} FPS: {total_iter*bs/(end-start)}")
    os.chmod(fpath, 0x666)
示例#18
0
    def lincomb_mask_loss(self, pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, score_data, inst_data, labels, interpolation_mode='bilinear'):
        mask_h = proto_data.shape[1]
        mask_w = proto_data.shape[2]


        process_gt_bboxes = cfg.mask_proto_normalize_emulate_roi_pooling or cfg.mask_proto_crop

        if cfg.mask_proto_remove_empty_masks:
            # Make sure to store a copy of this because we edit it to get rid of all-zero masks
            pos = pos.clone()

        loss_m = 0
        loss_d = 0 # Coefficient diversity loss

        maskiou_t_list = []
        maskiou_net_input_list = []
        label_t_list = []

        for idx in range(mask_data.shape[0]):
            with jt.no_grad():
                downsampled_masks = nn.interpolate(masks[idx].unsqueeze(0), (mask_h, mask_w),
                                                  mode=interpolation_mode, align_corners=False).squeeze(0)
                downsampled_masks = downsampled_masks.permute(1, 2, 0)

                if cfg.mask_proto_binarize_downsampled_gt:
                    downsampled_masks = (downsampled_masks>0.5).float()

                if cfg.mask_proto_remove_empty_masks:
                    # Get rid of gt masks that are so small they get downsampled away
                    very_small_masks = (downsampled_masks.sum(0).sum(0) <= 0.0001)
                    for i in range(very_small_masks.shape[0]):
                        if very_small_masks[i]:
                            pos[idx, idx_t[idx] == i] = 0

                if cfg.mask_proto_reweight_mask_loss:
                    # Ensure that the gt is binary
                    if not cfg.mask_proto_binarize_downsampled_gt:
                        bin_gt = (downsampled_masks>0.5).float()
                    else:
                        bin_gt = downsampled_masks

                    gt_foreground_norm = bin_gt     / (jt.sum(bin_gt,   dim=(0,1), keepdim=True) + 0.0001)
                    gt_background_norm = (1-bin_gt) / (jt.sum(1-bin_gt, dim=(0,1), keepdim=True) + 0.0001)

                    mask_reweighting   = gt_foreground_norm * cfg.mask_proto_reweight_coeff + gt_background_norm
                    mask_reweighting  *= mask_h * mask_w

            cur_pos = pos[idx]
            cur_pos = jt.where(cur_pos)[0]
            pos_idx_t = idx_t[idx, cur_pos]
            
            if process_gt_bboxes:
                # Note: this is in point-form
                if cfg.mask_proto_crop_with_pred_box:
                    pos_gt_box_t = decode(loc_data[idx, :, :], priors.data, cfg.use_yolo_regressors)[cur_pos]
                else:
                    pos_gt_box_t = gt_box_t[idx, cur_pos]

            if pos_idx_t.shape[0] == 0:
                continue

            proto_masks = proto_data[idx]
            proto_coef  = mask_data[idx, cur_pos, :]
            if cfg.use_mask_scoring:
                mask_scores = score_data[idx, cur_pos, :]

            if cfg.mask_proto_coeff_diversity_loss:
                if inst_data is not None:
                    div_coeffs = inst_data[idx, cur_pos, :]
                else:
                    div_coeffs = proto_coef

                loss_d += self.coeff_diversity_loss(div_coeffs, pos_idx_t)
            
            # If we have over the allowed number of masks, select a random sample
            old_num_pos = proto_coef.shape[0]
            if old_num_pos > cfg.masks_to_train:
                perm = jt.randperm(proto_coef.shape[0])
                select = perm[:cfg.masks_to_train]

                proto_coef = proto_coef[select, :]
                pos_idx_t  = pos_idx_t[select]
                
                if process_gt_bboxes:
                    pos_gt_box_t = pos_gt_box_t[select, :]
                if cfg.use_mask_scoring:
                    mask_scores = mask_scores[select, :]

            num_pos = proto_coef.shape[0]
            mask_t = downsampled_masks[:, :, pos_idx_t]     
            label_t = labels[idx][pos_idx_t]     

            # Size: [mask_h, mask_w, num_pos]
            pred_masks = proto_masks @ proto_coef.transpose(1,0)

            pred_masks = cfg.mask_proto_mask_activation(pred_masks)

            if cfg.mask_proto_double_loss:
                if cfg.mask_proto_mask_activation == activation_func.sigmoid:
                    pre_loss = nn.bce_loss(jt.clamp(pred_masks, 0, 1), mask_t, size_average=False)
                else:
                    pre_loss = nn.smooth_l1_loss(pred_masks, mask_t, reduction='sum')
                
                loss_m += cfg.mask_proto_double_loss_alpha * pre_loss

            if cfg.mask_proto_crop:
                pred_masks = crop(pred_masks, pos_gt_box_t)
            
            if cfg.mask_proto_mask_activation == activation_func.sigmoid:
                pre_loss = binary_cross_entropy(jt.clamp(pred_masks, 0, 1), mask_t)
            else:
                pre_loss = nn.smooth_l1_loss(pred_masks, mask_t, reduction='none')

            if cfg.mask_proto_normalize_mask_loss_by_sqrt_area:
                gt_area  = jt.sum(mask_t, dim=(0, 1), keepdims=True)
                pre_loss = pre_loss / (jt.sqrt(gt_area) + 0.0001)
            
            if cfg.mask_proto_reweight_mask_loss:
                pre_loss = pre_loss * mask_reweighting[:, :, pos_idx_t]
            
                
            if cfg.mask_proto_normalize_emulate_roi_pooling:
                weight = mask_h * mask_w if cfg.mask_proto_crop else 1
                pos_gt_csize = center_size(pos_gt_box_t)
                gt_box_width  = pos_gt_csize[:, 2] * mask_w
                gt_box_height = pos_gt_csize[:, 3] * mask_h
                pre_loss = pre_loss.sum(0).sum(0) / gt_box_width / gt_box_height * weight
            

            # If the number of masks were limited scale the loss accordingly
            if old_num_pos > num_pos:
                pre_loss *= old_num_pos / num_pos

            loss_m += jt.sum(pre_loss)

            if cfg.use_maskiou:
                if cfg.discard_mask_area > 0:
                    gt_mask_area = jt.sum(mask_t, dim=(0, 1))
                    select = gt_mask_area > cfg.discard_mask_area

                    if jt.sum(select).item() < 1:
                        continue

                    pos_gt_box_t = pos_gt_box_t[select, :]
                    pred_masks = pred_masks[:, :, select]
                    mask_t = mask_t[:, :, select]
                    label_t = label_t[select]

                maskiou_net_input = pred_masks.permute(2, 0, 1).unsqueeze(1)
                pred_masks = (pred_masks>0.5).float()                
                maskiou_t = self._mask_iou(pred_masks, mask_t)
                
                maskiou_net_input_list.append(maskiou_net_input)
                maskiou_t_list.append(maskiou_t)
                label_t_list.append(label_t)
        
        losses = {'M': loss_m * cfg.mask_alpha / mask_h / mask_w}

        if cfg.mask_proto_coeff_diversity_loss:
            losses['D'] = loss_d

        if cfg.use_maskiou:
            # discard_mask_area discarded every mask in the batch, so nothing to do here
            if len(maskiou_t_list) == 0:
                return losses, None

            maskiou_t = jt.contrib.concat(maskiou_t_list)
            label_t = jt.contrib.concat(label_t_list)
            maskiou_net_input = jt.contrib.concat(maskiou_net_input_list)

            num_samples = maskiou_t.shape[0]
            if cfg.maskious_to_train > 0 and num_samples > cfg.maskious_to_train:
                perm = jt.randperm(num_samples)
                select = perm[:cfg.masks_to_train]
                maskiou_t = maskiou_t[select]
                label_t = label_t[select]
                maskiou_net_input = maskiou_net_input[select]

            return losses, [maskiou_net_input, maskiou_t, label_t]

        return losses
示例#19
0
    if args.config is None:
        model_path = SavePath.from_str(args.trained_model)
        # TODO: Bad practice? Probably want to do a name lookup instead.
        args.config = model_path.model_name + '_config'
        print('Config not specified. Parsed %s from the file name.\n' %
              args.config)
        set_cfg(args.config)

    if args.detect:
        cfg.eval_mask_branch = False

    if args.dataset is not None:
        set_dataset(args.dataset)

    with jt.no_grad():
        if not os.path.exists('results'):
            os.makedirs('results')

        if args.resume and not args.display:
            with open(args.ap_data_file, 'rb') as f:
                ap_data = pickle.load(f)
            calc_map(ap_data)
            exit()

        if args.image is None and args.video is None and args.images is None:
            # dataset = COCODetection(cfg.dataset.valid_images, cfg.dataset.valid_info,
            # transform=BaseTransform(), has_gt=cfg.dataset.has_gt)
            dataset = EvalCOCODetection(cfg.dataset.valid_images,
                                        cfg.dataset.valid_info,
                                        transform=BaseTransform(),
示例#20
0
    def train(self,
              dataset,
              num_workers,
              epochs,
              batch_sizes,
              fade_in_percentage,
              logger,
              output,
              num_samples=36,
              start_depth=0,
              feedback_factor=100,
              checkpoint_factor=1):
        """
        Utility method for training the GAN. Note that you don't have to necessarily use this
        you can use the optimize_generator and optimize_discriminator for your own training routine.

        :param dataset: object of the dataset used for training.
                        Note that this is not the data loader (we create data loader in this method
                        since the batch_sizes for resolutions can be different)
        :param num_workers: number of workers for reading the data. def=3
        :param epochs: list of number of epochs to train the network for every resolution
        :param batch_sizes: list of batch_sizes for every resolution
        :param fade_in_percentage: list of percentages of epochs per resolution used for fading in the new layer
                                   not used for first resolution, but dummy value still needed.
        :param logger:
        :param output: Output dir for samples,models,and log.
        :param num_samples: number of samples generated in sample_sheet. def=36
        :param start_depth: start training from this depth. def=0
        :param feedback_factor: number of logs per epoch. def=100
        :param checkpoint_factor:
        :return: None (Writes multiple files to disk)
        """

        assert self.depth <= len(epochs), "epochs not compatible with depth"
        assert self.depth <= len(
            batch_sizes), "batch_sizes not compatible with depth"
        assert self.depth <= len(
            fade_in_percentage), "fade_in_percentage not compatible with depth"

        # turn the generator and discriminator into train mode
        self.gen.train()
        self.dis.train()
        if self.use_ema:
            self.gen_shadow.train()

        # create a global time counter
        global_time = time.time()

        # create fixed_input for debugging
        # fixed_input = torch.randn(num_samples, self.latent_size).to(self.device)
        fixed_input = jt.random([num_samples, self.latent_size], 'float32',
                                'normal').stop_grad()

        # config depend on structure
        logger.info("Starting the training process ... \n")
        if self.structure == 'fixed':
            start_depth = self.depth - 1
        step = 1  # counter for number of iterations
        for current_depth in range(start_depth, self.depth):
            current_res = np.power(2, current_depth + 2)
            logger.info("Currently working on depth: %d", current_depth + 1)
            logger.info("Current resolution: %d x %d" %
                        (current_res, current_res))

            ticker = 1

            # Choose training parameters and configure training ops.
            # TODO
            data = get_data_loader(dataset, batch_sizes[current_depth],
                                   num_workers)

            for epoch in range(1, epochs[current_depth] + 1):
                start = timeit.default_timer(
                )  # record time at the start of epoch

                logger.info("Epoch: [%d]" % epoch)
                # total_batches = len(iter(data))
                total_batches = len(data)

                fade_point = int((fade_in_percentage[current_depth] / 100) *
                                 epochs[current_depth] * total_batches)

                for i, (batch, useless) in enumerate(data, 1):
                    # calculate the alpha for fading in the layers
                    alpha = ticker / fade_point if ticker <= fade_point else 1

                    # extract current batch of data for training
                    # images = batch.to(self.device)
                    # gan_input = torch.randn(images.shape[0], self.latent_size).to(self.device)
                    images = batch
                    gan_input = jt.random([images.shape[0], self.latent_size],
                                          'float32', 'normal').stop_grad()

                    # optimize the discriminator:
                    dis_loss = self.optimize_discriminator(
                        gan_input, images, current_depth, alpha)

                    # optimize the generator:
                    gen_loss = self.optimize_generator(gan_input, images,
                                                       current_depth, alpha)

                    # provide a loss feedback
                    if i % int(total_batches / feedback_factor +
                               1) == 0 or i == 1:
                        elapsed = time.time() - global_time
                        elapsed = str(
                            datetime.timedelta(seconds=elapsed)).split('.')[0]
                        logger.info(
                            "Elapsed: [%s] Step: %d  Batch: %d  D_Loss: %f  G_Loss: %f"
                            % (elapsed, step, i, dis_loss, gen_loss))

                        # create a grid of samples and save it
                        os.makedirs(os.path.join(output, 'samples'),
                                    exist_ok=True)
                        gen_img_file = os.path.join(
                            output, 'samples', "gen_" + str(current_depth) +
                            "_" + str(epoch) + "_" + str(i) + ".png")

                        # with torch.no_grad():
                        with jt.no_grad():
                            self.create_grid(
                                samples=self.gen(fixed_input, current_depth,
                                                 alpha).detach()
                                if not self.use_ema else self.gen_shadow(
                                    fixed_input, current_depth,
                                    alpha).detach(),
                                scale_factor=int(
                                    np.power(2, self.depth - current_depth -
                                             1))
                                if self.structure == 'linear' else 1,
                                img_file=gen_img_file,
                            )

                    # increment the alpha ticker and the step
                    ticker += 1
                    step += 1

                elapsed = timeit.default_timer() - start
                elapsed = str(
                    datetime.timedelta(seconds=elapsed)).split('.')[0]
                logger.info("Time taken for epoch: %s\n" % elapsed)

                if epoch % checkpoint_factor == 0 or epoch == 1 or epoch == epochs[
                        current_depth]:
                    save_dir = os.path.join(output, 'models')
                    os.makedirs(save_dir, exist_ok=True)
                    '''
                    gen_save_file = os.path.join(save_dir, "GAN_GEN_" + str(current_depth) + "_" + str(epoch) + ".pth")
                    dis_save_file = os.path.join(save_dir, "GAN_DIS_" + str(current_depth) + "_" + str(epoch) + ".pth")
                    gen_optim_save_file = os.path.join(
                        save_dir, "GAN_GEN_OPTIM_" + str(current_depth) + "_" + str(epoch) + ".pth")
                    dis_optim_save_file = os.path.join(
                        save_dir, "GAN_DIS_OPTIM_" + str(current_depth) + "_" + str(epoch) + ".pth")
                    '''
                    gen_save_file = os.path.join(
                        save_dir, "GAN_GEN_" + str(current_depth) + "_" +
                        str(epoch) + ".pkl")
                    dis_save_file = os.path.join(
                        save_dir, "GAN_DIS_" + str(current_depth) + "_" +
                        str(epoch) + ".pkl")
                    # torch.save(self.gen.state_dict(), gen_save_file)
                    self.gen.save(gen_save_file)
                    logger.info("Saving the model to: %s\n" % gen_save_file)
                    # torch.save(self.dis.state_dict(), dis_save_file)
                    self.dis.save(dis_save_file)
                    # torch.save(self.gen_optim.state_dict(), gen_optim_save_file)
                    # torch.save(self.dis_optim.state_dict(), dis_optim_save_file)

                    # also save the shadow generator if use_ema is True
                    if self.use_ema:
                        # gen_shadow_save_file = os.path.join(
                        #     save_dir, "GAN_GEN_SHADOW_" + str(current_depth) + "_" + str(epoch) + ".pth")
                        # torch.save(self.gen_shadow.state_dict(), gen_shadow_save_file)
                        gen_shadow_save_file = os.path.join(
                            save_dir, "GAN_GEN_SHADOW_" + str(current_depth) +
                            "_" + str(epoch) + ".pkl")
                        self.gen_shadow.save(gen_shadow_save_file)
                        logger.info("Saving the model to: %s\n" %
                                    gen_shadow_save_file)

        logger.info('Training completed.\n')
示例#21
0
    def execute(self, imgs, size=640, augment=False):
        # Inference from various sources. For height=720, width=1280, RGB images example inputs are:
        #   filename:   imgs = 'data/samples/zidane.jpg'
        #   URI:             = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
        #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(720,1280,3)
        #   PIL:             = Image.open('image.jpg')  # HWC x(720,1280,3)
        #   numpy:           = np.zeros((720,1280,3))  # HWC
        #   torch:           = torch.zeros(16,3,720,1280)  # BCHW
        #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images

        p = next(self.model.parameters())  # for device and type
        if isinstance(imgs, jt.Var):  # torch
            return self.model(imgs.cast(p.dtype), augment)  # inference

        # Pre-process
        n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (
            1, [imgs])  # number of images, list of images
        shape0, shape1, files = [], [], [
        ]  # image and inference shapes, filenames
        for i, im in enumerate(imgs):
            if isinstance(im, str):  # filename or uri
                im, f = Image.open(
                    requests.get(im, stream=True).raw if im.
                    startswith('http') else im), im  # open
                im.filename = f  # for uri
            files.append(
                Path(im.filename).with_suffix('.jpg').
                name if isinstance(im, Image.Image) else f'image{i}.jpg')
            im = np.array(im)  # to numpy
            if im.shape[0] < 5:  # image in CHW
                im = im.transpose(
                    (1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
            im = im[:, :, :3] if im.ndim == 3 else np.tile(
                im[:, :, None], 3)  # enforce 3ch input
            s = im.shape[:2]  # HWC
            shape0.append(s)  # image shape
            g = (size / max(s))  # gain
            shape1.append([y * g for y in s])
            imgs[i] = im  # update
        shape1 = [
            make_divisible(x, int(self.stride.max()))
            for x in np.stack(shape1, 0).max(0)
        ]  # inference shape
        x = [letterbox(im, new_shape=shape1, auto=False)[0]
             for im in imgs]  # pad
        x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
        x = jt.array(x).cast(p.dtype) / 255.  # uint8 to fp16/32

        # Inference
        with jt.no_grad():
            y = self.model(x, augment)[0]  # forward
        y = non_max_suppression(y,
                                conf_thres=self.conf,
                                iou_thres=self.iou,
                                classes=self.classes)  # NMS

        # Post-process
        for i in range(n):
            y[i][:, :4] = scale_coords(shape1, y[i][:, :4], shape0[i])

        return Detections(imgs, y, files, self.names)
示例#22
0
def kaiming_normal_(var, a=0, mode='fan_in', nonlinearity='leaky_relu'):
    std = calculate_std(var, mode, nonlinearity, a)
    with jt.no_grad():
        return gauss_(var, 0, std)
示例#23
0
def test(cfg = None,
         data = None,
         weights=None,
         batch_size=32,
         imgsz=640,
         conf_thres=0.001,
         iou_thres=0.6,  # for NMS
         save_json=False,
         single_cls=False,
         augment=False,
         verbose=False,
         model=None,
         dataloader=None,
         save_dir=Path(''),  # for saving images
         save_txt=False,  # for auto-labelling
         save_hybrid=False,  # for hybrid auto-labelling
         save_conf=False,  # save auto-label confidences
         plots=True): 

    # Initialize/load model and set device
    training = model is not None
    if not training:  # called by train.py
        # called directly
        set_logging()
        # Directories
        save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run
        (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

        # Load model
        model = Model(cfg)
        model.load(weights)
        model = model.fuse()
        imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size

    # Configure
    model.eval()
    is_coco = data.endswith('coco.yaml')  # is COCO dataset
    with open(data) as f:
        data = yaml.load(f, Loader=yaml.FullLoader)  # model dict
    check_dataset(data)  # check
    nc = 1 if single_cls else int(data['nc'])  # number of classes
    iouv = jt.linspace(0.5, 0.95, 10)  # iou vector for [email protected]:0.95
    niou = iouv.numel()


    # Dataloader
    if not training:
        img = jt.zeros((1, 3, imgsz, imgsz))  # init img
        path = data['test'] if opt.task == 'test' else data['val']  # path to val/test images
        dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, pad=0.5, rect=True,
                                       prefix=colorstr('test: ' if opt.task == 'test' else 'val: '))

    seen = 0
    confusion_matrix = ConfusionMatrix(nc=nc)
    names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
    coco91class = coco80_to_coco91_class()
    s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95')
    p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
    loss = jt.zeros((3,))
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
        img = img.float32()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        targets = targets
        nb, _, height, width = img.shape  # batch size, channels, height, width

        with jt.no_grad():
            # Run model
            t = time_synchronized()
            inf_out, train_out = model(img, augment=augment)  # inference and training outputs
            t0 += time_synchronized() - t

            # Compute loss
            if training:
                loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3]  # box, obj, cls

            # Run NMS
            targets[:, 2:] *= jt.array([width, height, width, height])  # to pixels
            lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
            t = time_synchronized()
            output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb)
            t1 += time_synchronized() - t
        
        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            path = Path(paths[si])
            seen += 1

            if len(pred) == 0:
                if nl:
                    stats.append((jt.zeros((0, niou), dtype="bool"), jt.array([]), jt.array([]), tcls))
                continue
            
            # Predictions
            predn = pred.clone()
            predn[:, :4] = scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1])  # native-space pred

            # Append to text file
            if save_txt:
                gn = jt.array(shapes[si][0])[jt.array([1, 0, 1, 0])]  # normalization gain whwh
                for *xyxy, conf, cls in predn.tolist():
                    xywh = (xyxy2xywh(jt.array(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                    line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
                    with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f:
                        f.write(('%g ' * len(line)).rstrip() % line + '\n')

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(path.stem) if path.stem.isnumeric() else path.stem
                box = xyxy2xywh(predn[:, :4])  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    jdict.append({'image_id': image_id,
                                  'category_id': coco91class[int(p[5])] if is_coco else int(p[5]),
                                  'bbox': [round(x, 3) for x in b],
                                  'score': round(p[4], 5)})

            # Assign all predictions as incorrect
            correct = jt.zeros((pred.shape[0], niou), dtype="bool")
            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                tbox = scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1])  # native-space labels
                if plots:
                    confusion_matrix.process_batch(predn, jt.contrib.concat((labels[:, 0:1], tbox), 1))

                # Per target class
                for cls in jt.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero().view(-1)  # prediction indices
                    pi = (cls == pred[:, 5]).nonzero().view(-1)  # target indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        i ,ious = box_iou(predn[pi, :4], tbox[ti]).argmax(1)  # best ious, indices

                        # Append detections
                        detected_set = set()
                        for j in (ious > iouv[0]).nonzero():
                            d = ti[i[j]]  # detected target
                            if d.item() not in detected_set:
                                detected_set.add(d.item())
                                detected.append(d)
                                correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                if len(detected) == nl:  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            stats.append((correct.numpy(), pred[:, 4].numpy(), pred[:, 5].numpy(), tcls))
        
        # Plot images
        if plots and batch_i < 3:
            f = save_dir / f'test_batch{batch_i}_labels.jpg'  # labels
            Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start()
            f = save_dir / f'test_batch{batch_i}_pred.jpg'  # predictions
            Thread(target=plot_images, args=(img, output_to_target(output), paths, f, names), daemon=True).start()

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats) and stats[0].any():
        p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names)
        ap50, ap = ap[:, 0], ap.mean(1)  # [email protected], [email protected]:0.95
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
    else:
        nt = np.zeros((1,))

    # Print results
    pf = '%20s' + '%12.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

    # Print results per class
    if (verbose or (nc <= 20 and not training)) and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

    # Print speeds
    t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple
    if not training:
        print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)

    # Plots
    if plots:
        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))

    # Save JSON
    if save_json and len(jdict):
        w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ''  # weights
        anno_json = '../coco/annotations/instances_val2017.json'  # annotations json
        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
        print('\nEvaluating pycocotools mAP... saving %s...' % pred_json)
        with open(pred_json, 'w') as f:
            json.dump(jdict, f)

        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            anno = COCO(anno_json)  # init annotations api
            pred = anno.loadRes(pred_json)  # init predictions api
            eval = COCOeval(anno, pred, 'bbox')
            if is_coco:
                eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files]  # image IDs to evaluate
            eval.evaluate()
            eval.accumulate()
            eval.summarize()
            map, map50 = eval.stats[:2]  # update results ([email protected]:0.95, [email protected])
        except Exception as e:
            print(f'pycocotools unable to run: {e}')

    # Return results
    if not training:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map50, map, *(loss.numpy() / len(dataloader)).tolist()), maps, t
示例#24
0
def kaiming_uniform_(var, a=0, mode='fan_in', nonlinearity='leaky_relu'):
    std = calculate_std(var, mode, nonlinearity, a)
    bound = math.sqrt(3.0) * std
    with jt.no_grad():
        return uniform_(var, -bound, bound)
示例#25
0
def draw_style_mixing_figure(png, gen, out_depth, src_seeds, dst_seeds,
                             style_ranges):
    n_col = len(src_seeds)
    n_row = len(dst_seeds)
    w = h = 2**(out_depth + 2)
    # with torch.no_grad():
    with jt.no_grad():
        latent_size = gen.g_mapping.latent_size
        src_latents_np = np.stack([
            np.random.RandomState(seed).randn(latent_size, )
            for seed in src_seeds
        ])
        dst_latents_np = np.stack([
            np.random.RandomState(seed).randn(latent_size, )
            for seed in dst_seeds
        ])
        # src_latents = torch.from_numpy(src_latents_np.astype(np.float32))
        # dst_latents = torch.from_numpy(dst_latents_np.astype(np.float32))
        src_latents = jt.array(src_latents_np.astype(np.float32))
        dst_latents = jt.array(dst_latents_np.astype(np.float32))
        src_dlatents = gen.g_mapping(src_latents)  # [seed, layer, component]
        dst_dlatents = gen.g_mapping(dst_latents)  # [seed, layer, component]
        src_images = gen.g_synthesis(src_dlatents, depth=out_depth, alpha=1)
        dst_images = gen.g_synthesis(dst_dlatents, depth=out_depth, alpha=1)

        # src_dlatents_np = src_dlatents.numpy()
        # dst_dlatents_np = dst_dlatents.numpy()
        src_dlatents_np = src_dlatents.data
        dst_dlatents_np = dst_dlatents.data
        canvas = Image.new('RGB', (w * (n_col + 1), h * (n_row + 1)), 'white')
        for col, src_image in enumerate(list(src_images)):
            src_image = adjust_dynamic_range(src_image)
            # src_image = src_image.mul(255).clamp(0, 255).byte().permute(1, 2, 0).numpy()
            src_image = src_image.multiply(255).clamp(0, 255).permute(
                1, 2, 0).data.astype(np.uint8)
            canvas.paste(Image.fromarray(src_image, 'RGB'), ((col + 1) * w, 0))
        for row, dst_image in enumerate(list(dst_images)):
            dst_image = adjust_dynamic_range(dst_image)
            # dst_image = dst_image.mul(255).clamp(0, 255).byte().permute(1, 2, 0).numpy()
            dst_image = dst_image.multiply(255).clamp(0, 255).permute(
                1, 2, 0).data.astype(np.uint8)
            canvas.paste(Image.fromarray(dst_image, 'RGB'), (0, (row + 1) * h))

            row_dlatents = np.stack([dst_dlatents_np[row]] * n_col)
            row_dlatents[:,
                         style_ranges[row]] = src_dlatents_np[:,
                                                              style_ranges[row]]
            # row_dlatents = torch.from_numpy(row_dlatents)
            row_dlatents = jt.array(row_dlatents)

            row_images = gen.g_synthesis(row_dlatents,
                                         depth=out_depth,
                                         alpha=1)
            for col, image in enumerate(list(row_images)):
                image = adjust_dynamic_range(image)
                # image = image.mul(255).clamp(0, 255).byte().permute(1, 2, 0).numpy()
                image = image.multiply(255).clamp(0, 255).permute(
                    1, 2, 0).data.astype(np.uint8)
                canvas.paste(Image.fromarray(image, 'RGB'),
                             ((col + 1) * w, (row + 1) * h))
        canvas.save(png)
示例#26
0
 def transform_frame(frames):
     with jt.no_grad():
         frames = [jt.array(frame).float() for frame in frames]
         return frames, transform(jt.stack(frames, 0))
示例#27
0
def test(model, dataset='cocoVal', logger=print, benchmark=False):
    if dataset == 'OCHumanVal':
        ImageRoot = './data/OCHuman/images'
        AnnoFile = './data/OCHuman/annotations/ochuman_coco_format_val_range_0.00_1.00.json'
    elif dataset == 'OCHumanTest':
        ImageRoot = './data/OCHuman/images'
        AnnoFile = './data/OCHuman/annotations/ochuman_coco_format_test_range_0.00_1.00.json'
    elif dataset == 'cocoVal':
        ImageRoot = './data/coco2017/val2017'
        AnnoFile = './data/coco2017/annotations/person_keypoints_val2017_pose2seg.json'
    datainfos = COCOTEST(ImageRoot,
                         AnnoFile,
                         onlyperson=True,
                         loadimg=True,
                         is_test=True)
    datainfos.batch_size = 1
    datainfos.num_workers = 1
    datainfos.collate_batch = collate_batch
    data_len = len(datainfos)
    #data_len = 1

    model.eval()

    results_segm = []
    imgIds = []
    start_time = time.time()
    outputs = []

    # jt.profiler.start(0, 0)

    # for i in tqdm(range(data_len)):
    for i, batch in tqdm(enumerate(datainfos)):
        #datainfos.display_worker_status()
        #if i>100:break
        # rawdata = datainfos[i]
        rawdata = batch[0]
        img = rawdata['data']
        image_id = rawdata['id']

        # height, width = img.shape[0:2]
        # gt_kpts = np.float32(rawdata['gt_keypoints']).transpose(0, 2, 1) # (N, 17, 3)
        # gt_segms = rawdata['segms']
        # gt_masks = np.array([annToMask(segm, height, width) for segm in gt_segms])
        gt_kpts = rawdata['gt_kpts']
        gt_masks = rawdata['gt_masks']
        with jt.no_grad():
            output = model([img], [gt_kpts], [gt_masks], rawdata['test_input'])
        imgIds.append(image_id)
        #jt.display_memory_info()

        if benchmark: continue
        #outputs.append(output)
        for mask in output[0]:
            #print(np.sum(mask))
            maskencode = maskUtils.encode(np.asfortranarray(mask))
            maskencode['counts'] = maskencode['counts'].decode('ascii')
            results_segm.append({
                "image_id": image_id,
                "category_id": 1,
                "score": 1.0,
                "segmentation": maskencode
            })
    jt.sync_all(True)

    # jt.profiler.stop()
    # jt.profiler.report()
    '''
    for output,image_id in zip(outputs,imgIds):
        for mask in output[0]:
            #print(np.sum(mask))
            maskencode = maskUtils.encode(np.asfortranarray(mask))
            maskencode['counts'] = maskencode['counts'].decode('ascii')
            results_segm.append({
                    "image_id": image_id,
                    "category_id": 1,
                    "score": 1.0,
                    "segmentation": maskencode
                })
    '''
    # print(len(results_segm))
    end_time = time.time()
    print('fps', data_len / (end_time - start_time))

    if benchmark: return

    def do_eval_coco(image_ids, coco, results, flag):
        from pycocotools.cocoeval import COCOeval
        assert flag in ['bbox', 'segm', 'keypoints']
        # Evaluate
        coco_results = coco.loadRes(results)
        cocoEval = COCOeval(coco, coco_results, flag)
        cocoEval.params.imgIds = image_ids
        cocoEval.params.catIds = [1]
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()
        return cocoEval

    cocoEval = do_eval_coco(imgIds, datainfos.COCO, results_segm, 'segm')
    logger('[POSE2SEG]          AP|.5|.75| S| M| L|    AR|.5|.75| S| M| L|')
    _str = '[segm_score] %s ' % dataset
    for value in cocoEval.stats.tolist():
        _str += '%.3f ' % value
    logger(_str)