def validate(args,
             net,
             val_data_loader,
             val_dataset,
             iteration_num,
             iou_thresh=0.5):
    """Test a SSD network on an image database."""
    print('Validating at ', iteration_num)
    num_images = len(val_dataset)
    num_classes = args.num_classes

    det_boxes = [[] for _ in range(len(CLASSES))]
    gt_boxes = []
    print_time = True
    batch_iterator = None
    val_step = 100
    count = 0
    torch.cuda.synchronize()
    ts = time.perf_counter()

    for val_itr in range(len(val_data_loader)):
        if not batch_iterator:
            batch_iterator = iter(val_data_loader)

        torch.cuda.synchronize()
        t1 = time.perf_counter()

        images, targets, img_indexs = next(batch_iterator)
        batch_size = images.size(0)
        height, width = images.size(2), images.size(3)

        if args.cuda:
            images = Variable(images.cuda(), volatile=True)
        output = net(images)

        loc_data = output[0]
        conf_preds = output[1]
        prior_data = output[2]

        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            tf = time.perf_counter()
            print('Forward Time {:0.3f}'.format(tf - t1))
        for b in range(batch_size):
            gt = targets[b].numpy()
            gt[:, 0] *= width
            gt[:, 2] *= width
            gt[:, 1] *= height
            gt[:, 3] *= height
            gt_boxes.append(gt)
            decoded_boxes = decode(loc_data[b].data, prior_data.data,
                                   args.cfg['variance']).clone()
            conf_scores = net.softmax(conf_preds[b]).data.clone()

            for cl_ind in range(1, num_classes):
                scores = conf_scores[:, cl_ind].squeeze()
                c_mask = scores.gt(
                    args.conf_thresh)  # greater than minmum threshold
                scores = scores[c_mask].squeeze()
                # print('scores size',scores.size())
                if scores.dim() == 0:
                    # print(len(''), ' dim ==0 ')
                    det_boxes[cl_ind - 1].append(np.asarray([]))
                    continue
                boxes = decoded_boxes.clone()
                l_mask = c_mask.unsqueeze(1).expand_as(boxes)
                boxes = boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, counts = nms(boxes, scores, args.nms_thresh,
                                  args.topk)  # idsn - ids after nms
                scores = scores[ids[:counts]].cpu().numpy()
                boxes = boxes[ids[:counts]].cpu().numpy()
                # print('boxes sahpe',boxes.shape)
                boxes[:, 0] *= width
                boxes[:, 2] *= width
                boxes[:, 1] *= height
                boxes[:, 3] *= height

                for ik in range(boxes.shape[0]):
                    boxes[ik, 0] = max(0, boxes[ik, 0])
                    boxes[ik, 2] = min(width, boxes[ik, 2])
                    boxes[ik, 1] = max(0, boxes[ik, 1])
                    boxes[ik, 3] = min(height, boxes[ik, 3])

                cls_dets = np.hstack(
                    (boxes, scores[:, np.newaxis])).astype(np.float32,
                                                           copy=True)

                det_boxes[cl_ind - 1].append(cls_dets)
            count += 1
        if val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('im_detect: {:d}/{:d} time taken {:0.3f}'.format(
                count, num_images, te - ts))
            torch.cuda.synchronize()
            ts = time.perf_counter()
        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('NMS stuff Time {:0.3f}'.format(te - tf))
    print('Evaluating detections for itration number ', iteration_num)
    return evaluate_detections(gt_boxes,
                               det_boxes,
                               CLASSES,
                               iou_thresh=iou_thresh)
Пример #2
0
def test_net(net,
             save_root,
             exp_name,
             input_type,
             dataset,
             iteration,
             num_classes,
             thresh=0.5):
    """ Test a SSD network on an Action image database. """

    val_data_loader = data.DataLoader(dataset,
                                      args.batch_size,
                                      num_workers=args.num_workers,
                                      shuffle=False,
                                      collate_fn=detection_collate,
                                      pin_memory=True)
    image_ids = dataset.ids
    save_ids = []
    val_step = 250
    num_images = len(dataset)
    video_list = dataset.video_list
    det_boxes = [[] for _ in range(len(CLASSES))]
    gt_boxes = []
    print_time = True
    batch_iterator = None
    count = 0
    torch.cuda.synchronize()
    ts = time.perf_counter()
    num_batches = len(val_data_loader)
    det_file = save_root + 'cache/' + exp_name + '/detection-' + str(
        iteration).zfill(6) + '.pkl'
    print('Number of images ', len(dataset), ' number of batchs', num_batches)
    frame_save_dir = save_root + 'detections/CONV-' + input_type + '-' + args.listid + '-' + str(
        iteration).zfill(6) + '/'
    print('\n\n\nDetections will be store in ', frame_save_dir, '\n\n')
    for val_itr in range(len(val_data_loader)):
        if not batch_iterator:
            batch_iterator = iter(val_data_loader)

        torch.cuda.synchronize()
        t1 = time.perf_counter()

        images, targets, img_indexs = next(batch_iterator)
        batch_size = images.size(0)
        height, width = images.size(2), images.size(3)

        if args.cuda:
            images = Variable(images.cuda(), volatile=True)
        output = net(images)

        loc_data = output[0]
        conf_preds = output[1]
        prior_data = output[2]

        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            tf = time.perf_counter()
            print('Forward Time {:0.3f}'.format(tf - t1))
        for b in range(batch_size):
            gt = targets[b].numpy()
            gt[:, 0] *= width
            gt[:, 2] *= width
            gt[:, 1] *= height
            gt[:, 3] *= height
            gt_boxes.append(gt)
            decoded_boxes = decode(loc_data[b].data, prior_data.data,
                                   cfg['variance']).clone()
            conf_scores = net.softmax(conf_preds[b]).data.clone()
            index = img_indexs[b]
            annot_info = image_ids[index]

            frame_num = annot_info[1]
            video_id = annot_info[0]
            videoname = video_list[video_id]
            # output_dir = frame_save_dir+videoname
            # if not os.path.isdir(output_dir):
            #     os.makedirs(output_dir)
            #
            # output_file_name = output_dir+'/{:05d}.mat'.format(int(frame_num))
            # save_ids.append(output_file_name)
            # sio.savemat(output_file_name, mdict={'scores':conf_scores.cpu().numpy(),'loc':decoded_boxes.cpu().numpy()})

            for cl_ind in range(1, num_classes):
                scores = conf_scores[:, cl_ind].squeeze()
                c_mask = scores.gt(
                    args.conf_thresh)  # greater than minmum threshold
                scores = scores[c_mask].squeeze()
                # print('scores size',scores.size())
                if scores.dim() == 0:
                    # print(len(''), ' dim ==0 ')
                    det_boxes[cl_ind - 1].append(np.asarray([]))
                    continue
                boxes = decoded_boxes.clone()
                l_mask = c_mask.unsqueeze(1).expand_as(boxes)
                boxes = boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, counts = nms(boxes, scores, args.nms_thresh,
                                  args.topk)  # idsn - ids after nms
                scores = scores[ids[:counts]].cpu().numpy()
                boxes = boxes[ids[:counts]].cpu().numpy()
                # print('boxes sahpe',boxes.shape)
                boxes[:, 0] *= width
                boxes[:, 2] *= width
                boxes[:, 1] *= height
                boxes[:, 3] *= height

                for ik in range(boxes.shape[0]):
                    boxes[ik, 0] = max(0, boxes[ik, 0])
                    boxes[ik, 2] = min(width, boxes[ik, 2])
                    boxes[ik, 1] = max(0, boxes[ik, 1])
                    boxes[ik, 3] = min(height, boxes[ik, 3])

                cls_dets = np.hstack(
                    (boxes, scores[:, np.newaxis])).astype(np.float32,
                                                           copy=True)
                det_boxes[cl_ind - 1].append(cls_dets)

            count += 1
        if val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('im_detect: {:d}/{:d} time taken {:0.3f}'.format(
                count, num_images, te - ts))
            torch.cuda.synchronize()
            ts = time.perf_counter()
        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('NMS stuff Time {:0.3f}'.format(te - tf))
    print('Evaluating detections for itration number ', iteration)

    # #Save detection after NMS along with GT
    # with open(det_file, 'wb') as f:
    #     pickle.dump([gt_boxes, det_boxes, save_ids], f, pickle.HIGHEST_PROTOCOL)

    return evaluate_detections(gt_boxes, det_boxes, CLASSES, iou_thresh=thresh)
Пример #3
0
def validate(args,
             net,
             val_data_loader,
             val_dataset,
             epoch,
             iou_thresh=0.5,
             num_gpu=1):
    """Test a SSD network on an image database."""
    print('Validating at ', epoch)
    num_images = len(val_dataset)
    num_classes = args.num_classes

    det_boxes = [[] for _ in range(len(CLASSES))]
    gt_boxes = []
    print_time = True
    val_step = 100
    count = 0
    net.eval()  # switch net to evaluation modelen(val_data_loader)-2,
    torch.cuda.synchronize()
    ts = time.perf_counter()

    # create batch iterator

    batch_iterator = [[] for i in range(num_gpu)]
    max_x_y = 0
    min_x_y = []
    for i in range(num_gpu):
        batch_iterator[i] = iter(val_data_loader[i])
        min_x_y.append(len(val_data_loader[i]))
        max_x_y = max(max_x_y, len(val_data_loader[i]))
        # print("len: ", len(train_data_loader[i]))

    iter_count = 0
    t0 = time.perf_counter()
    dtype = torch.cuda.FloatTensor
    for val_itr in range(max_x_y):
        img_indexs = []
        for ii in range(num_gpu):
            if val_itr >= min_x_y[ii]:
                batch_iterator[ii] = iter(val_data_loader[ii])

        torch.cuda.synchronize()
        t1 = time.perf_counter()

        img_indexs = []
        images, targets, img_in = next(batch_iterator[0])
        img_indexs.append(img_in)

        img = torch.zeros([1, 3, 300, 300])
        images = torch.cat((images, img.type_as(images)), 0)

        for ii in range(num_gpu - 1):
            img, targ, img_in = next(batch_iterator[ii + 1])
            images = torch.cat((images, img), 0)
            img = (torch.ones([1, 3, 300, 300]) + ii)
            images = torch.cat((images, img.type_as(images)), 0)
            for iii in range(len(targ)):
                targets.append(targ[iii])

            img_indexs.append(img_in)

        batch_size = images.size(0) - num_gpu
        height, width = images.size(2), images.size(3)

        if args.cuda:
            images = Variable(images.cuda(), volatile=True)

        output = net(images, img_indexs)

        loc_data = output[0]
        conf_preds = output[1]
        prior_data = output[2]
        prior_data = prior_data[:loc_data.size(1), :]

        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            tf = time.perf_counter()
            print('Forward Time {:0.3f}'.format(tf - t1))
        for b in range(batch_size):
            gt = targets[b].numpy()
            gt[:, 0] *= width
            gt[:, 2] *= width
            gt[:, 1] *= height
            gt[:, 3] *= height
            gt_boxes.append(gt)
            decoded_boxes = decode(loc_data[b].data, prior_data.data,
                                   args.cfg['variance']).clone()
            conf_scores = net.module.softmax(conf_preds[b]).data.clone()

            for cl_ind in range(1, num_classes):
                scores = conf_scores[:, cl_ind].squeeze()
                c_mask = scores.gt(
                    args.conf_thresh)  # greater than minmum threshold
                scores = scores[c_mask].squeeze()
                # print('scores size',scores.size())
                if scores.dim() == 0:
                    # print(len(''), ' dim ==0 ')
                    det_boxes[cl_ind - 1].append(np.asarray([]))
                    continue
                boxes = decoded_boxes.clone()
                l_mask = c_mask.unsqueeze(1).expand_as(boxes)
                boxes = boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, counts = nms(boxes, scores, args.nms_thresh,
                                  args.topk)  # idsn - ids after nms
                scores = scores[ids[:counts]].cpu().numpy()
                boxes = boxes[ids[:counts]].cpu().numpy()
                # print('boxes sahpe',boxes.shape)
                boxes[:, 0] *= width
                boxes[:, 2] *= width
                boxes[:, 1] *= height
                boxes[:, 3] *= height

                for ik in range(boxes.shape[0]):
                    boxes[ik, 0] = max(0, boxes[ik, 0])
                    boxes[ik, 2] = min(width, boxes[ik, 2])
                    boxes[ik, 1] = max(0, boxes[ik, 1])
                    boxes[ik, 3] = min(height, boxes[ik, 3])

                cls_dets = np.hstack(
                    (boxes, scores[:, np.newaxis])).astype(np.float32,
                                                           copy=True)

                det_boxes[cl_ind - 1].append(cls_dets)
            count += 1
        if val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('im_detect: {:d}/{:d} time taken {:0.3f}'.format(
                count, num_images, te - ts))
            torch.cuda.synchronize()
            ts = time.perf_counter()
        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('NMS stuff Time {:0.3f}'.format(te - tf))
    print('Evaluating detections for epoch number ', epoch)
    return evaluate_detections(gt_boxes,
                               det_boxes,
                               CLASSES,
                               iou_thresh=iou_thresh)
def test_net(net, save_root, exp_name, input_type, dataset, iteration,
             li_color_class, means_bgr, n_record_per_class, th_iou):
    """ Test a SSD network on an Action image database. """
    '''
    print('type(means) : ', type(means))
    print('means : ', means)
    '''
    #li_color_class = make_class_color_list(num_classes)
    shall_record = n_record_per_class > 0
    th_conf = args.conf_thresh
    th_nms = args.nms_thresh
    top_k = args.topk
    t3 = np.asarray(means_bgr)
    means_rgb = np.flipud(t3)
    #means_rgb_2 = np.fliplr(t3)
    #print('t3 : ', t3); print('means_rgb_1 : ', means_rgb_1);   exit(); #print('means_rgb_2 : ', means_bgr_2);   exit()

    #val_data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True)
    val_data_loader = data.DataLoader(dataset,
                                      args.batch_size,
                                      num_workers=args.num_workers,
                                      shuffle=True,
                                      collate_fn=detection_collate,
                                      pin_memory=True)
    image_ids = dataset.ids
    save_ids = []
    val_step = 250
    num_images = len(dataset)
    video_list = dataset.video_list
    '''
    print('type(dataset) : ', type(dataset)); 
    print('num_images : ', num_images); 
    print('len(video_list) : ', len(video_list));  exit()
    '''
    det_boxes = [[] for _ in range(len(CLASSES))]
    gt_boxes = []
    print_time = True
    batch_iterator = None
    count = 0
    torch.cuda.synchronize()
    ts = time.perf_counter()
    num_batches = len(val_data_loader)
    det_file = save_root + 'cache/' + exp_name + '/detection-' + input_type + '_' + str(
        iteration).zfill(6) + '.pkl'
    print('det_file : ', det_file)
    #exit()
    print('Number of images ', len(dataset), ' number of batchs', num_batches)
    frame_save_dir = save_root + 'detections/CONV-' + input_type + '-' + args.listid + '-' + str(
        iteration).zfill(6) + '/'
    print('\n\n\nDetections will be store in ', frame_save_dir, '\n\n')
    if shall_record:
        di_class_num_processed = {}
        fn_record = 'action_recognition_images_conf_thres_{:.2f}_nms_thres_{:.1f}_fpc_{}.avi'.format(
            th_conf, th_nms, n_record_per_class)
        writer = make_video_recorder(fn_record, (300, 300), 20)
    shall_stop = False
    for val_itr in range(len(val_data_loader)):
        print('\nval_itr : {} / {}'.format(val_itr, len(val_data_loader)))
        if not batch_iterator:
            batch_iterator = iter(val_data_loader)
        torch.cuda.synchronize()
        t1 = time.perf_counter()
        images_rgb, targets, img_indexs = next(batch_iterator)
        batch_size = images_rgb.size(0)
        if shall_record:
            skip_this_batch = False
            for b in range(batch_size):
                img_idx = img_indexs[b]
                annot_info = dataset.ids[img_idx]
                video_id = annot_info[0]
                video_name = dataset.video_list[video_id].split("/")[0]
                if video_name in di_class_num_processed:
                    if di_class_num_processed[video_name] > n_record_per_class:
                        skip_this_batch = True
                        break
                    di_class_num_processed[video_name] += 1
                else:
                    di_class_num_processed[video_name] = 1
            if skip_this_batch:
                continue

        height, width = images_rgb.size(2), images_rgb.size(3)
        li_margin_ratio_l_r_t_b = [0, 0, 0, 0]
        if args.cuda:
            images_rgb = Variable(images_rgb.cuda(), volatile=True)
            #exit()
        #print('images_rgb.shape : ', images_rgb.shape)


########    networking forwarding ######################################################
        output = net(images_rgb)
        ######################################################################################
        loc_data = output[0]
        conf_preds = output[1]
        prior_data = output[2]

        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            tf = time.perf_counter()
            print('Forward Time {:0.3f}'.format(tf - t1))

        #   for each image in this batch
        for b in range(batch_size):
            #print('b : {} / {}'.format(b, batch_size))
            img_idx = img_indexs[b]
            annot_info = dataset.ids[img_idx]
            #print('annot_info : ', annot_info)
            video_id = annot_info[0]
            frame_num = annot_info[1]

            #print('video_id : ', video_id)
            video_name = dataset.video_list[video_id]
            video_class = video_name.split("/")[0]
            img_name = dataset._imgpath + '/{:s}/{:05d}.jpg'.format(
                video_name, frame_num)
            #print('video_name : ', video_name)
            #print('video_class : ', video_class)
            print('img_name : ', img_name)

            #t1_rgb = np.transpose(images_rgb[b].cpu().numpy(), (1, 2, 0))
            #exit()
            t1_rgb = np.transpose(images_rgb[b].cpu().data.numpy(), (1, 2, 0))

            t2_rgb = t1_rgb + means_rgb
            t3_bgr = cv2.cvtColor(t2_rgb.astype(np.uint8), cv2.COLOR_RGB2BGR)
            gt = targets[b].numpy()
            gt[:, 0] *= width
            gt[:, 2] *= width
            gt[:, 1] *= height
            gt[:, 3] *= height
            #print('type(gt) : ', type(gt)); exit()
            #cv2.putText(t3_bgr, video_name, (60, 20), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 0, 255))
            id_vid = dataset.CLASSES.index(video_class)
            cv2.putText(t3_bgr, video_class,
                        (X_OFFSET_GT_VID, Y_OFFSET_GT_VID),
                        cv2.FONT_HERSHEY_DUPLEX, FONT_SCALE_GT_VID,
                        li_color_class[id_vid])

            cv2.putText(t3_bgr, "conf. thres. : {:.2f}".format(th_conf),
                        (int(width * 0.5 - 85), int(height - 10)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255))

            if not shall_record:
                t3_bgr = mark_ground_truth(t3_bgr, gt, dataset.CLASSES,
                                           li_color_class)
            gt_boxes.append(gt)

            decoded_boxes = decode(loc_data[b].data, prior_data.data,
                                   cfg['variance']).clone()
            conf_scores = net.softmax(conf_preds[b]).data.clone()

            t3_bgr, det_boxes = mark_detections(t3_bgr, conf_scores,
                                                dataset.CLASSES, decoded_boxes,
                                                (width, height),
                                                li_margin_ratio_l_r_t_b,
                                                li_color_class, top_k, th_conf,
                                                th_nms, det_boxes)

            #index = img_indexs[b]
            annot_info = image_ids[img_idx]
            #exit()

            frame_num = annot_info[1]
            video_id = annot_info[0]
            videoname = video_list[video_id]
            output_dir = frame_save_dir + videoname
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)

            output_file_name = output_dir + '/{:05d}.mat'.format(
                int(frame_num))
            save_ids.append(output_file_name)
            sio.savemat(output_file_name,
                        mdict={
                            'scores': conf_scores.cpu().numpy(),
                            'loc': decoded_boxes.cpu().numpy()
                        })
            if shall_record:
                writer.write(t3_bgr)
            count += 1
            cv2.imshow('t3_bgr', t3_bgr)
            #cv2.waitKey(1)
            k = cv2.waitKey() & 0xFF
            #k = cv2.waitKey(1)
            '''
            if 255 != k:
                print('k : ', k)
            '''
            if 27 == k:
                shall_stop = True
        if val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('im_detect: {:d}/{:d} time taken {:0.3f}'.format(
                count, num_images, te - ts))
            torch.cuda.synchronize()
            ts = time.perf_counter()
        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('NMS stuff Time {:0.3f}'.format(te - tf))
        if shall_stop:
            break
    print('Evaluating detections for itration number ', iteration)

    #Save detection after NMS along with GT
    with open(det_file, 'wb') as f:
        pickle.dump([gt_boxes, det_boxes, save_ids], f,
                    pickle.HIGHEST_PROTOCOL)
    if shall_record:
        writer.release()
        convert_vid_2_animated_gif(fn_record)
    return evaluate_detections(gt_boxes, det_boxes, CLASSES, iou_thresh=th_iou)
Пример #5
0
def test_net(net, priors, args, dataset, iteration, thresh=0.5 ):
    """ Test a SSD network on an Action image database. """
    print('Test a SSD network on an Action image database')
    val_data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers,
                            shuffle=False, collate_fn=detection_collate, pin_memory=True)
    print('Done making val dataset')
    image_ids = dataset.ids
    save_ids = []
    val_step = 250
    num_images = len(dataset)
    video_list = dataset.video_list
    det_boxes = [[] for _ in range(len(CLASSES[args.dataset]))]
    gt_boxes = []
    print_time = True
    batch_iterator = None
    count = 0
    torch.cuda.synchronize()
    ts = time.perf_counter()
    num_batches = len(val_data_loader)
    frame_save_dir = '{}detections/{:s}-eg{:02d}/'.format(args.save_root, args.exp_name, args.eval_gap)
    softmax = nn.Softmax(dim=2).cuda()
    for val_itr in range(len(val_data_loader)):
        if not batch_iterator:
            batch_iterator = iter(val_data_loader)

        torch.cuda.synchronize()
        t1 = time.perf_counter()

        images, ground_truths, _ , _, num_mt, img_indexs = next(batch_iterator)
        
        batch_size = images[0].size(0)
        #images = images.permute(1, 0, 2, 3, 4)
        height, width = images[0].size(3), images[0].size(4)

        images = [img.cuda(0, non_blocking=True) for img in images if not isinstance(img, list)]
        conf_preds, loc_data = net(images)
            
            # pdb.set_trace()
        conf_scores_all = softmax(conf_preds).clone()

        if print_time and val_itr%val_step == 0:
            torch.cuda.synchronize()
            tf = time.perf_counter()
            print('Forward Time {:0.3f}'.format(tf - t1))
        for b in range(batch_size):
            inds = np.asarray([m * args.seq_len for m in range(num_mt[b])])
            gt = ground_truths[b].numpy()
            gt = gt[inds]
            gt[:, 0] *= width
            gt[:, 2] *= width
            gt[:, 1] *= height
            gt[:, 3] *= height
            gt_boxes.append(gt)
            bloc_data = loc_data[b]
            #print(bloc_data.size(), prior_data.size())
            decoded_boxes = decode_seq(bloc_data, priors, args.cfg['variance'], args.seq_len)
            decoded_boxes = decoded_boxes.cpu()
            conf_scores = conf_scores_all[b].cpu().clone()
            index = img_indexs[b]
            annot_info = image_ids[index]

            frame_num = annot_info[1][0]+1; video_id = annot_info[0]; videoname = video_list[video_id]
            output_dir = frame_save_dir+videoname
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)

            # for s in range(args.seq_len):
            output_file_name_tmp = output_dir + '/{:06d}.mat'.format(int(frame_num))
            # save_ids.append(output_file_name_tmp)
            decoded_boxes_tmp = decoded_boxes.numpy()
            #print(output_file_name_tmp)
            sio.savemat(output_file_name_tmp,
                    mdict={'scores': conf_scores.numpy(), 'loc': decoded_boxes_tmp})

            decoded_boxes = decoded_boxes[:, :4].clone()
            
            for cl_ind in range(1, args.num_classes):
                scores = conf_scores[:, cl_ind].squeeze()
                c_mask = scores.gt(args.conf_thresh)  # greater than minmum threshold
                scores = scores[c_mask].squeeze()
                # print('scores size',scores.size())
                if scores.dim() == 0:
                    # print(len(''), ' dim ==0 ')
                    det_boxes[cl_ind - 1].append(np.asarray([]))
                    continue
                boxes = decoded_boxes.clone()
                l_mask = c_mask.unsqueeze(1).expand_as(boxes)
                boxes = boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, counts = nms(boxes, scores, args.nms_thresh, args.topk)  # idsn - ids after nms
                scores = scores[ids[:counts]].numpy()
                boxes = boxes[ids[:counts]].numpy()
                # print('boxes sahpe',boxes.shape)
                boxes[:, 0] *= width
                boxes[:, 2] *= width
                boxes[:, 1] *= height
                boxes[:, 3] *= height

                for ik in range(boxes.shape[0]):
                    boxes[ik, 0] = max(0, boxes[ik, 0])
                    boxes[ik, 2] = min(width, boxes[ik, 2])
                    boxes[ik, 1] = max(0, boxes[ik, 1])
                    boxes[ik, 3] = min(height, boxes[ik, 3])

                cls_dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=True)
                det_boxes[cl_ind - 1].append(cls_dets)

            count += 1
        if val_itr%val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('im_detect: {:d}/{:d} time taken {:0.3f}'.format(count, num_images, te - ts))
            torch.cuda.synchronize()
            ts = time.perf_counter()
        if print_time and val_itr%val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('NMS stuff Time {:0.3f}'.format(te - tf))
    print('Evaluating detections for itration number ', iteration)

    #Save detection after NMS along with GT
    # with open(det_file, 'wb') as f:
    #     pickle.dump([gt_boxes, det_boxes, save_ids], f, pickle.HIGHEST_PROTOCOL)
    # if args.dataset != 'daly00000000000000000000000000':
    #     return 0, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '\n\n\n AP is not COMPUTED for any of the classes in dataset \n\n\n'
    # else:
    return evaluate_detections(gt_boxes, det_boxes, CLASSES[args.dataset], iou_thresh=thresh)
Пример #6
0
def validate(args, net, priors, val_data_loader, val_dataset, iteration_num, iou_thresh=0.5):
    """Test a SSD network on an image database."""
    print('Validating at ', iteration_num)
    num_images = len(val_dataset)
    num_classes = args.num_classes
    priors = priors.cuda()
    det_boxes = [[] for _ in range(len(CLASSES[args.dataset]))]
    gt_boxes = []
    print_time = True
    batch_iterator = None
    val_step = 100
    count = 0
    torch.cuda.synchronize()
    ts = time.perf_counter()
    softmax = nn.Softmax(dim=2).cuda()
    with torch.no_grad():
        for val_itr in range(len(val_data_loader)):
            if not batch_iterator:
                batch_iterator = iter(val_data_loader)

            torch.cuda.synchronize()
            t1 = time.perf_counter()

            images, ground_truths, _ , _, num_mt, img_indexs = next(batch_iterator)
            batch_size = images[0].size(0)
            #images = images.permute(1, 0, 2, 3, 4)
            height, width = images[0].size(3), images[0].size(4)

            images = [img.cuda(0, non_blocking=True) for img in images if not isinstance(img, list)]

            conf_preds, loc_data = net(images)
            
            # pdb.set_trace()
            conf_scores_all = softmax(conf_preds).clone()
            

            if print_time and val_itr%val_step == 0:
                torch.cuda.synchronize()
                tf = time.perf_counter()
                print('Forward Time {:0.3f}'.format(tf-t1))
            
            for b in range(batch_size):
                # pdb.set_trace()
                inds = np.asarray([m*args.seq_len for m in range(num_mt[b])])
                # pdb.set_trace()
                gt = ground_truths[b].numpy()
                gt = gt[inds]
                gt[:,0] *= width
                gt[:,2] *= width
                gt[:,1] *= height
                gt[:,3] *= height
                gt_boxes.append(gt)
                decoded_boxes = decode_seq(loc_data[b], priors, args.cfg['variance'], args.seq_len)
                decoded_boxes = decoded_boxes[:,:4].clone()
                conf_scores = conf_scores_all[b].clone()
                #Apply nms per class and obtain the results
                for cl_ind in range(1, num_classes):
                    # pdb.set_trace()
                    scores = conf_scores[:, cl_ind].squeeze()
                    c_mask = scores.gt(args.conf_thresh)  # greater than minmum threshold
                    scores = scores[c_mask].squeeze() # reduce the dimension so if no element then # of dim is 0
                    if scores.dim() == 0:
                        det_boxes[cl_ind - 1].append(np.asarray([]))
                        continue
                    boxes = decoded_boxes.clone()
                    l_mask = c_mask.unsqueeze(1).expand_as(boxes)
                    boxes = boxes[l_mask].view(-1, 4)
                    # idx of highest scoring and non-overlapping boxes per class
                    ids, counts = nms(boxes, scores, args.nms_thresh, args.topk)  # idsn - ids after nms
                    scores = scores[ids[:counts]].cpu().numpy()
                    boxes = boxes[ids[:counts]].cpu().numpy()
                    # print('boxes sahpe',boxes.shape)
                    boxes[:,0] *= width
                    boxes[:,2] *= width
                    boxes[:,1] *= height
                    boxes[:,3] *= height

                    for ik in range(boxes.shape[0]):
                        boxes[ik, 0] = max(0, boxes[ik, 0])
                        boxes[ik, 2] = min(width, boxes[ik, 2])
                        boxes[ik, 1] = max(0, boxes[ik, 1])
                        boxes[ik, 3] = min(height, boxes[ik, 3])
                    cls_dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=True)
                    det_boxes[cl_ind-1].append(cls_dets)
                count += 1

            if val_itr%val_step == 0:
                torch.cuda.synchronize()
                te = time.perf_counter()
                print('im_detect: {:d}/{:d} time taken {:0.3f}'.format(count, num_images, te-ts))
                torch.cuda.synchronize()
                ts = time.perf_counter()
            if print_time and val_itr%val_step == 0:
                torch.cuda.synchronize()
                te = time.perf_counter()
                print('NMS stuff Time {:0.3f}'.format(te - tf))
    print('Evaluating detections for itration number ', iteration_num)
    return evaluate_detections(gt_boxes, det_boxes, CLASSES[args.dataset], iou_thresh=iou_thresh)