Пример #1
0
def read_files(dtype):
    tic()
    files = sorted(os.listdir('../out'))
    files = [f for f in files
             if f.startswith('features-') and f.endswith('-%s.csv' % dtype)]
    names, X = zip(*map(read_file, files))
    toc('read %s data' % dtype)
    return names, np.concatenate(X, 1)
Пример #2
0
def extract(info_filename, pairs_filename, mode):
    info_filename = os.path.join('../data', info_filename)
    pairs_filename = os.path.join('../data', pairs_filename)

    tic()
    info_df = pd.read_csv(
        info_filename,
        dtype={'itemID': int, 'categoryID': int, 'price': float},
        usecols=(0, 1, 6, 7, 8, 9, 10), index_col=0)
    info_df['line'] = np.arange(len(info_df), dtype=int)
    toc('info file')

    info_reader = MyCSVReader(info_filename)
    toc('info reader')

    cols = (0, 1) if mode == 'train' else (1, 2)
    pairs = np.genfromtxt(pairs_filename, int, delimiter=',', skip_header=1,
                          usecols=cols)
    toc('pairs file')

    # transforma ItemID em linhas do ficheiro CSV e da matriz info
    a = info_df.ix[pairs[:, 0]]['line']
    b = info_df.ix[pairs[:, 1]]['line']
    pairs_lines = np.c_[a, b]
    toc('pairs lines')

    params = (info_filename, info_reader, info_df, pairs_lines)
    modules = [module[:-3] for module in sorted(os.listdir('features'))
               if module.startswith('extract-')]
    csvs = ['../out/features-%s-%s.csv' % (module[8:], mode)
            for module in modules]

    # create features from modules that have been created or changed
    #pool = multiprocessing.Pool(multiprocessing.cpu_count()/2)
    #res = []
    for module, csv in itertools.izip(modules, csvs):
        #res.append(pool.apply_async(sync_extract, (module, csv, params)))
        sync_extract(module, csv, params)
    #for r in res:
    #    r.get()

    # remove whatever has been created by extiguish modules
    vestiges = [os.path.join('../out', f) for f in os.listdir('../out')
                if f.startswith('features-') and f.endswith('-%s.csv' % mode)]
    for v in vestiges:
        if v not in csvs:
            print 'removing old %s...' % v
            os.remove(v)
Пример #3
0
def sync_extract(module, csv, params):
    create = not os.path.exists(csv)
    if not create:
        m1 = os.path.getmtime(csv)
        m2 = os.path.getmtime('features/' + module + '.py')
        create = m2 > m1
    if create:
        tic()
        i = importlib.import_module('features.' + module)
        X, names = i.fn(*params)
        toc(module[8:])
        if len(X):
            if len(X[0].shape) == 1:
                X = [x[:, np.newaxis] for x in X]
            X = np.concatenate(X, 1)
            assert X.shape[1] == len(names)
            #names = ['"' + name + '"' for name in names]
            header = ','.join(names)
            fmt = '%d' if X.dtype == int else '%.6f'
            np.savetxt(csv, X, fmt, delimiter=',', header=header, comments='')
Пример #4
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = "resnet_v1_101_fpn_dcn_rcnn"  if not args.rfcn_only else "resnet_v1_101_fpn_rcnn"
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
               'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
               'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
               'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
               'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
               'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
               'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
               'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
    # test
    # find all videos
    video_path = "../../tmp"#"../../aic2018/track1/track1_videos"
    video_files = sorted([ x for x in os.listdir(video_path) if x.endswith(".mp4")])
    save_path = "../../tmp/output"#"../../aic2018/track1/output"
    if not os.path.isdir(save_path):
        os.makedirs(save_path)
    
    print("processing {} videos...".format(len(video_files)))
    pbar = tqdm(total=len(video_files))
    for vf in video_files:
        vid = imageio.get_reader(os.path.join(video_path, vf),'ffmpeg')
        data = []
        for idx, im in enumerate(vid):
            if idx == 0:
                #assert os.path.exists(im_path + im_name), ('%s does not exist'.format(im_path + im_name))
                #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
                target_size = config.SCALES[0][0]
                max_size = config.SCALES[0][1]
                im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
                im_tensor = transform(im, config.network.PIXEL_MEANS)
                im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
                data.append({'data': im_tensor, 'im_info': im_info})
            else:
                break
                #data.append({'data': None, 'im_info': None})
        
        # get predictor
        data_names = ['data', 'im_info']
        label_names = []
        data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
        max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
        provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
        provide_label = [None for i in xrange(len(data))]

        print("hhhhh")
        print(provide_data, provide_label)
        print("hhhhh")  

        arg_params, aux_params = load_param(cur_path + '/../model/demo_model/' + ('fpn_dcn_coco' if not args.rfcn_only else 'fpn_coco'), 0, process=True)

        #print(type(arg_params), type(aux_params))

        predictor = Predictor(sym, data_names, label_names,
                              context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                              provide_data=provide_data, provide_label=provide_label,
                              arg_params=arg_params, aux_params=aux_params)
        nms = gpu_nms_wrapper(config.TEST.NMS, 0)

        print("successfully load model")
        
        vout = []
        # write to video
        writer = skvideo.io.FFmpegWriter(os.path.join(save_path, vf.replace(".mp4","_out.mp4")), outputdict={'-vcodec': 'libx264', '-b': '300000000'})
        for frame_idx, im in enumerate(vid):
            #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            im_original = im.copy()
            
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]
            im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)

            data_idx = [{"data": im_tensor, "im_info": im_info}]
            data_idx = [[mx.nd.array(data_idx[i][name]) for name in data_names] for i in xrange(len(data_idx))]
            data_batch = mx.io.DataBatch(data=[data_idx[0]], label=[], pad=0, index=idx,
                                         provide_data=[[(k, v.shape) for k, v in zip(data_names, data_idx[0])]],
                                         provide_label=[None])

            scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

            tic()
            scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
            boxes = boxes[0].astype('f')
            scores = scores[0].astype('f')
            dets_nms = []
            num_dets = 0
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.65, :]
                dets_nms.append(cls_dets)
                num_dets += cls_dets.shape[0]
            
            print 'testing {} the {} th frame at {:.4f}s, detections {}'.format(vf, frame_idx, toc(), num_dets)
            # save results
            #im = cv2.imread(im_path + im_name)
            #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            #im_bbox = show_boxes(im, dets_nms, classes, 1)
            #cv2.imwrite(im_path + im_name.replace(".jpg", "_bbox.jpg"), im_bbox)
            save_im, outputs = show_boxes(im_original, dets_nms, classes, 1, False)
            #cv2.imwrite(os.path.join(save_path, "{}_{}.jpg".format(vf.replace(".mp4", ""), str(frame_idx).zfill(5))), save_im)
            writer.writeFrame(save_im)
            
            for out in outputs:
                vout.append([frame_idx] + out)
        
        # save the whole video detection into pickle file
        writer.close()
        with open(os.path.join(save_path, vf.replace(".mp4", "_detect.pkl")), "wb") as f:
            pickle.dump(vout, f, protocol=2)
        pbar.update(1)
        
    pbar.close()    
    print 'done'
Пример #5
0
# -*- coding: utf-8 -*-

import sys
sys.dont_write_bytecode = True
import os
from utils.tictoc import tic, toc
import pickle
import numpy as np
import pandas as pd
from scipy import stats

print 'load items info...'
tic()
Xinfo = pd.read_csv('../../data/ItemInfo_train.csv', index_col=0,
                    usecols=[0, 1, 6, 7, 8, 9, 10])
toc()
print 'load items pairs...'
tic()
Xpair = pd.read_csv('../../data/ItemPairs_train.csv', usecols=[0, 1, 2])
toc()

# idxmap is an efficient mapping between item-id and row index
# we could also use Xinfo.ix[indices], but this approach seems
# slightly faster
tic()
print 'load items mapping...'
if os.path.exists('idxmap.pickle'):
    with open('idxmap.pickle', 'rb') as f:
        idxmap = pickle.load(f)
else:
    lastid = Xinfo.index[-1]
Пример #6
0
def main():
    # get symbol
    pprint.pprint(config)
    #config.symbol = "resnet_v1_101_fpn_dcn_rcnn"  if not args.rfcn_only else "resnet_v1_101_fpn_rcnn"
    config.symbol = "resnet_v1_101_fpn_dcn_rcnn"
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 5
    classes = ["car", "bus", "van", "others"]

    # load demo videos
    im_path = '../../aic2018/track1/images/'
    image_names = [
        x for x in os.listdir('../../aic2018/track1/images/')
        if (x.endswith(".jpg") and (x.startswith("9_1") or x.startswith("9_1"))
            ) and not x.endswith("_bbox.jpg")
    ]
    data = []
    for idx, im_name in enumerate(image_names[:1]):
        if idx == 0:
            assert os.path.exists(im_path + im_name), (
                '%s does not exist'.format(im_path + im_name))
            im = cv2.imread(im_path + im_name,
                            cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]
            im, im_scale = resize(im,
                                  target_size,
                                  max_size,
                                  stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)
            data.append({'data': im_tensor, 'im_info': im_info})
        else:
            data.append({'data': None, 'im_info': None})

    print(data)

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[0][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    # what does provide_data and provide_label work for?
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[0])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    ## load parameters
    arg_params, aux_params = load_param(cur_path + '/../model/' + 'fpn_detrac',
                                        1,
                                        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    print("successfully load model")

    # find all videos
    video_path = "../../tmp"
    video_files = [x for x in os.listdir(video_path) if x.endswith(".mp4")]
    save_path = "../../tmp/output"
    if not os.path.isdir(save_path):
        os.makedirs(save_path)

    print("processing {} videos...".format(len(video_files)))
    pbar = tqdm(total=len(video_files))
    for vf in video_files:
        vid = imageio.get_reader(os.path.join(video_path, vf), 'ffmpeg')
        vout = []
        for frame_idx, im in enumerate(vid):
            #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]
            im, im_scale = resize(im,
                                  target_size,
                                  max_size,
                                  stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)

            data_idx = [{"data": im_tensor, "im_info": im_info}]
            data_idx = [[
                mx.nd.array(data_idx[i][name]) for name in data_names
            ] for i in xrange(len(data_idx))]
            data_batch = mx.io.DataBatch(
                data=[data_idx[0]],
                label=[],
                pad=0,
                index=idx,
                provide_data=[[(k, v.shape)
                               for k, v in zip(data_names, data_idx[0])]],
                provide_label=[None])

            scales = [
                data_batch.data[i][1].asnumpy()[0, 2]
                for i in xrange(len(data_batch.data))
            ]

            tic()
            scores, boxes, data_dict = im_detect(predictor, data_batch,
                                                 data_names, scales, config)
            boxes = boxes[0].astype('f')
            scores = scores[0].astype('f')
            dets_nms = []
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:,
                                  4:8] if config.CLASS_AGNOSTIC else boxes[:,
                                                                           j *
                                                                           4:
                                                                           (j +
                                                                            1
                                                                            ) *
                                                                           4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
                dets_nms.append(cls_dets)

            print 'testing {} the {} th frame at {:.4f}s, detections {}'.format(
                vf, frame_idx, toc(), len(dets_nms))
            # save results
            #im = cv2.imread(im_path + im_name)
            #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            #im_bbox = show_boxes(im, dets_nms, classes, 1)
            #cv2.imwrite(im_path + im_name.replace(".jpg", "_bbox.jpg"), im_bbox)
            save_im, outputs = show_boxes(im, dets_nms, classes, 1)
            #cv2.imwrite(os.path.join(save_path, "{}_{}.jpg".format(vf.replace(".mp4", ""), str(frame_idx).zfill(5))), save_im)

            for out in outputs:
                vout.append([frame_idx] + out)

        # save the whole video detection into pickle file
        with open(os.path.join(save_path, vf.replace(".mp4", ".pkl")),
                  "wb") as f:
            pickle.dump(vout, f, protocol=2)
        pbar.update(1)

    pbar.close()
    print 'done'
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("indir",
                        type=lambda s: unicode(s, 'utf8'),
                        help="Directory containing list of images")
    parser.add_argument("outfile",
                        type=lambda s: unicode(s, 'utf8'),
                        help="Path to write predictions")
    parser.add_argument("-d",
                        "--device",
                        type=int,
                        default=0,
                        help="Device ID to use")
    args = parser.parse_args()
    params = vars(args)

    # ---------------------------------------------------------- Read config
    ctx_id = [int(i) for i in config.gpus.split(',')]
    pprint.pprint(config)
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = [
        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
        'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]
    config['gpus'] = str(params['device'])

    # ---------------------------------------------------------- Load Images
    image_path_list = []
    data = []
    scale_factor = 1.0
    img_dir = osp.abspath(params['indir'])
    det_thresh = 0.7

    # Load abs paths of images
    for f in sorted(os.listdir(img_dir)):
        _, f_ext = osp.splitext(f)
        if f_ext in ['.jpg', '.png', '.jpeg']:
            f_path = osp.join(img_dir, f)
            image_path_list.append(f_path)

    print 'Loading {} images into memory...'.format(len(image_path_list))

    for image_path in image_path_list:
        im = cv2.imread(image_path,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        height, width = im.shape[:2]
        im = cv2.resize(
            im, (int(scale_factor * width), int(scale_factor * height)))
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    print 'Loaded {} images'.format(len(image_path_list))

    # ---------------------------------------------------------- Predict
    predictions = []

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(
        '/BS/orekondy2/work/opt/FCIS/model/fcis_coco', 0, process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(ctx_id[0])],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # warm up
    for i in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        _, _, _, _ = im_detect(predictor, data_batch, data_names, scales,
                               config)

    # test
    for idx, image_path in enumerate(image_path_list):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch,
                                                    data_names, scales, config)
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(num_classes)]
            all_masks = [[] for _ in xrange(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
            im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
            print(im_height, im_width)
            boxes = clip_boxes(boxes[0], (im_height, im_width))
            result_masks, result_dets = gpu_mask_voting(
                masks, boxes, scores[0], num_classes, 100, im_width, im_height,
                config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                config.BINARY_THRESH, ctx_id[0])

            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [
                result_masks[j][:, 0, :, :] for j in range(1, num_classes)
            ]
        print '{} testing {} {:.4f}s'.format(idx, image_path, toc())
        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:, -1] > det_thresh)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]
        im = cv2.imread(image_path_list[idx])
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        org_height, org_width = cv2.imread(image_path_list[idx]).shape[:2]
        # im = cv2.resize(im,(int(scale_factor*org_width), int(scale_factor*org_height)))
        """
        visualize all detections in one image
        :param im_array: [b=1 c h w] in rgb
        :param detections: [ numpy.ndarray([[x1 y1 x2 y2 score]]) for j in classes ]
        :param class_names: list of names in imdb
        :param scale: visualize the scaled image
        :return:
        """
        detections = dets
        class_names = classes
        cfg = config
        scale = 1.0

        person_idx = class_names.index('person')
        dets = detections[person_idx]
        msks = masks[person_idx]

        for mask_idx, (det, msk) in enumerate(zip(dets, msks)):
            inst_arr = np.zeros_like(im[:, :, 0])  # Create a 2D W x H array
            bbox = det[:4] * scale
            cod = bbox.astype(int)
            if im[cod[1]:cod[3], cod[0]:cod[2], 0].size > 0:
                msk = cv2.resize(
                    msk, im[cod[1]:cod[3] + 1, cod[0]:cod[2] + 1, 0].T.shape)
                bimsk = (msk >= cfg.BINARY_THRESH).astype('uint8')

                # ------- Create bit-mask for this instance
                inst_arr[cod[1]:cod[3] + 1, cod[0]:cod[2] +
                         1] = bimsk  # Add thresholded binary mask
                rs_inst_arr = scipy.misc.imresize(inst_arr,
                                                  (org_height, org_width))
                rle = mask.encode(np.asfortranarray(rs_inst_arr))

                predictions.append({
                    'image_path': image_path,
                    'label': 'person',
                    'segmentation': rle,
                    'bbox': bbox.tolist(),
                    'score': det[-1],
                })

                del msk
                del bimsk
                del rs_inst_arr

    print 'Created {} predictions'.format(len(predictions))

    # ---------------------------------------------------------- Write output
    with open(params['outfile'], 'wb') as wf:
        json.dump(predictions, wf, indent=2)
Пример #8
0
def main(video_file):
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)
    arg_params, aux_params = load_param('./output/rfcn/road_obj/road_train_all/all/' + 'rfcn_road', 19, process=True)

    # set up class names; Don't count the background in, even we are treat the background as label '0'
    num_classes = 4
    classes = ['vehicle', 'pedestrian', 'cyclist', 'traffic lights']

    cap = cv2.VideoCapture(video_path)
    fps = math.floor(cap.get(5))
    # based on testing, this code process every frame takes around 0.25s. So my interval take 0.25s ~= 7frames
    fps = 8
    while (cap.isOpened()):
        frame_id = cap.get(1)
        ret, frame = cap.read()
        if frame_id % fps != 0:
            # print('Frame ID: {}'.format(str(frame_id)))
            cv2.imshow('video', frame)
            continue
        tic()
        data = []
        target_size = config.SCALES[0][1]
        max_size = config.SCALES[0][1]
        frame, im_scale = resize(frame, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(frame, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

        # get predictor
        data_names = ['data', 'im_info']
        label_names = []
        data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
        # print('Debug: [data] shape: {}, cont: {}'.format(type(data), data))
        max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
        # print('Debug: [max_data_shape] shape: {}, cont: {}'.format(type(max_data_shape), max_data_shape))
        provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
        # print('Debug: [provide_data] shape: {}, cont: {}'.format(type(provide_data), provide_data))
        provide_label = [None for i in xrange(len(data))]
        # print('Debug: [provide_label] shape: {}, cont: {}'.format(type(provide_label), provide_label))
        predictor = Predictor(sym, data_names, label_names,
                              context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                              provide_data=provide_data, provide_label=provide_label,
                              arg_params=arg_params, aux_params=aux_params)
        nms = gpu_nms_wrapper(config.TEST.NMS, 0)

        # Process video frame
        image_names=['frame']
        for idx, _ in enumerate(image_names):
            data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                         provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                         provide_label=[None])
            scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
            # print('Debug: [scales] cont: {}'.format(scales))

            scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
            boxes = boxes[0].astype('f')
            scores = scores[0].astype('f')
            dets_nms = []
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
                dets_nms.append(cls_dets)

            frame_with_bbox = draw_bbox_on_frame(frame, dets_nms, classes, scale=scales[0])
        cv2.imshow('video', frame_with_bbox)
        print 'Processing frame {} in {:.4f}s'.format(frame_id, toc())
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    print 'done'
Пример #9
0
def main():

    # settings
    num_classes = 19
    snip_len = 30
    version = str(args.version)
    interv = args.interval
    num_ex = args.num_ex
    avg_acc = args.avg_acc

    # validate params
    if version not in ['18', '34', '50', '101']:
        raise ValueError(
            "Invalid Accel version '%s' - must be one of Accel-{18,34,50,101}"
            % version)
    if interv < 1:
        raise ValueError("Invalid interval %d - must be >=1" % interv)
    if num_ex < 1:
        raise ValueError("Invalid num_ex %d - must be >=1" % num_ex)

    # get symbol
    pprint.pprint(config)
    config.symbol = 'accel_' + version
    model1 = '/../model/rfcn_dff_flownet_vid'
    model2 = '/../model/accel-' + version
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    key_sym = sym_instance.get_key_test_symbol(config)
    cur_sym = sym_instance.get_cur_test_symbol(config)

    path_demo_data = '/ebs/Accel/data/cityscapes/'
    path_demo_labels = '/ebs/Accel/data/cityscapes/'
    if path_demo_data == '' or path_demo_labels == '':
        raise ValueError("Must set path to demo data + labels")

    # load demo data
    image_names = sorted(
        glob.glob(path_demo_data + 'leftImg8bit_sequence/val/frankfurt/*.png'))
    image_names += sorted(
        glob.glob(path_demo_data + 'leftImg8bit_sequence/val/lindau/*.png'))
    image_names += sorted(
        glob.glob(path_demo_data + 'leftImg8bit_sequence/val/munster/*.png'))
    image_names = image_names[:snip_len * num_ex]
    label_files = sorted(
        glob.glob(path_demo_labels + 'gtFine/val/frankfurt/*trainIds.png'))
    label_files += sorted(
        glob.glob(path_demo_labels + 'gtFine/val/lindau/*trainIds.png'))
    label_files += sorted(
        glob.glob(path_demo_labels + 'gtFine/val/munster/*trainIds.png'))
    output_dir = cur_path + '/../demo/deeplab_dff/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = interv

    #
    lb_pos = 19
    image_names_trunc = []
    for i in range(num_ex):
        snip_pos = i * snip_len
        if avg_acc:
            offset = i % interv
        else:
            offset = interv - 1
        start_pos = lb_pos - offset
        image_names_trunc.extend(image_names[snip_pos + start_pos:snip_pos +
                                             start_pos + interv])
    image_names = image_names_trunc

    data = []
    key_im_tensor = None
    prev_im_tensor = None
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            key_im_tensor = im_tensor
        if prev_im_tensor is None:
            prev_im_tensor = im_tensor
        data.append({
            'data':
            im_tensor,
            'im_info':
            im_info,
            'data_key':
            prev_im_tensor,
            'feat_key':
            np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1))
        })
        prev_im_tensor = im_tensor

    # get predictor
    data_names = ['data', 'data_key', 'feat_key']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in config.SCALES]),
                  max([v[1] for v in config.SCALES]))),
        ('data_key', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]

    arg_params, aux_params = load_param(cur_path + model1, 0, process=True)
    arg_params_dcn, aux_params_dcn = load_param(cur_path + model2,
                                                0,
                                                process=True)
    arg_params.update(arg_params_dcn)
    aux_params.update(aux_params_dcn)
    key_predictor = Predictor(key_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    cur_predictor = Predictor(cur_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        if j % key_frame_interval == 0:
            output_all, feat = im_segment(key_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]
        else:
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            output_all, feat = im_segment(cur_predictor, data_batch)
            output_key = 'croped_score_output' if version == '101' else 'correction_output'
            output_all = [
                mx.ndarray.argmax(output[output_key], axis=1).asnumpy()
                for output in output_all
            ]

    print "warmup done"
    # test
    time = 0
    count = 0
    hist = np.zeros((num_classes, num_classes))
    lb_idx = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        if idx % key_frame_interval == 0:
            print '\n\nframe {} (key)'.format(idx)
            output_all, feat = im_segment(key_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]
        else:
            print '\nframe {} (intermediate)'.format(idx)
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            output_all, feat = im_segment(cur_predictor, data_batch)
            output_key = 'croped_score_output' if version == '101' else 'correction_output'
            output_all = [
                mx.ndarray.argmax(output[output_key], axis=1).asnumpy()
                for output in output_all
            ]

        elapsed = toc()
        time += elapsed
        count += 1
        print 'testing {} {:.4f}s [{:.4f}s]'.format(im_name, elapsed,
                                                    time / count)

        pred = np.uint8(np.squeeze(output_all))
        segmentation_result = Image.fromarray(pred)
        pallete = getpallete(256)
        segmentation_result.putpalette(pallete)
        _, im_filename = os.path.split(im_name)
        segmentation_result.save(output_dir + '/seg_' + im_filename)

        # compute accuracy
        label = None

        _, lb_filename = os.path.split(label_files[lb_idx])
        im_comps = im_filename.split('_')
        lb_comps = lb_filename.split('_')
        # check if annotation available for frame
        if im_comps[1] == lb_comps[1] and im_comps[2] == lb_comps[2]:
            print 'label {}'.format(lb_filename)
            label = np.asarray(Image.open(label_files[lb_idx]))
            if lb_idx < len(label_files) - 1:
                lb_idx += 1

        if label is not None:
            curr_hist = fast_hist(pred.flatten(), label.flatten(), num_classes)
            hist += curr_hist
            print 'mIoU {mIoU:.3f}'.format(
                mIoU=round(np.nanmean(per_class_iu(curr_hist)) * 100, 2))
            print '(cum) mIoU {mIoU:.3f}'.format(
                mIoU=round(np.nanmean(per_class_iu(hist)) * 100, 2))

    ious = per_class_iu(hist) * 100
    print ' '.join('{:.03f}'.format(i) for i in ious)
    print '===> final mIoU {mIoU:.3f}'.format(mIoU=round(np.nanmean(ious), 2))

    print 'done'
Пример #10
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    model = '/../model/rfcn_vid'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_test_symbol(config)

    # set up class names
    num_classes = 31
    classes = ['airplane', 'antelope', 'bear', 'bicycle',
               'bird', 'bus', 'car', 'cattle',
               'dog', 'domestic_cat', 'elephant', 'fox',
               'giant_panda', 'hamster', 'horse', 'lion',
               'lizard', 'monkey', 'motorcycle', 'rabbit',
               'red_panda', 'sheep', 'snake', 'squirrel',
               'tiger', 'train', 'turtle', 'watercraft',
               'whale', 'zebra']

    # load demo data
    image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG')
    output_dir = cur_path + '/../demo/rfcn/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    #

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})


    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)

    # test
    time = 0
    count = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
        time += toc()
        count += 1
        print 'testing {} {:.4f}s'.format(im_name, time/count)

        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)

        # visualize
        im = cv2.imread(im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        # show_boxes(im, dets_nms, classes, 1)
        out_im = draw_boxes(im, dets_nms, classes, 1)
        _, filename = os.path.split(im_name)
        cv2.imwrite(output_dir + filename,out_im)

    print 'done'
Пример #11
0
def process_image_fun(imagesPath=None,
                      fileOp=None,
                      vis=None,
                      model_params_list=None,
                      count=0):
    # init rfcn dcn detect model (mxnet)
    # model_params_list = init_detect_model()

    # num_classes = RFCN_DCN_CONFIG['num_classes']  # 0 is background,
    classes = RFCN_DCN_CONFIG['num_classes_name_list']
    min_threshold = min(list(
        RFCN_DCN_CONFIG['need_label_thresholds'].values()))

    im_name = imagesPath
    all_can_read_image = []
    data = []
    all_can_read_image.append(im_name)
    target_size = config.SCALES[0][0]
    max_size = config.SCALES[0][1]
    im, im_scale = resize(im_name,
                          target_size,
                          max_size,
                          stride=config.network.IMAGE_STRIDE)
    im_tensor = transform(im, config.network.PIXEL_MEANS)
    im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                       dtype=np.float32)
    data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]

    predictor = Predictor(model_params_list[0],
                          data_names,
                          label_names,
                          context=[mx.gpu(1)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=model_params_list[1],
                          aux_params=model_params_list[2])
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    for idx, im_name in enumerate(all_can_read_image):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > min_threshold, :]
            dets_nms.append(cls_dets)
        #print('testing {} {:.4f}s'.format(im_name, toc()))
        im = show_boxes_write_rg(im=im_name,
                                 dets=dets_nms,
                                 classes=classes,
                                 scale=1,
                                 vis=vis,
                                 fileOp=fileOp,
                                 count=count)
    return im
Пример #12
0
    def Seg(self):
        for i in xrange(2):
            data_batch = mx.io.DataBatch(
                data=[self.data],
                label=[],
                pad=0,
                index=0,
                provide_data=[[(k, v.shape)
                               for k, v in zip(self.data_names, self.data)]],
                provide_label=[None])
            scales = [
                data_batch.data[i][1].asnumpy()[0, 2]
                for i in xrange(len(data_batch.data))
            ]
            _, _, _, _ = im_detect(self.predictor, data_batch, self.data_names,
                                   scales, config)

        data_batch = mx.io.DataBatch(
            data=[self.data],
            label=[],
            pad=0,
            index=0,
            provide_data=[[(k, v.shape)
                           for k, v in zip(self.data_names, self.data)]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, masks, data_dict = im_detect(self.predictor, data_batch,
                                                    self.data_names, scales,
                                                    config)
        #print masks #right
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(self.num_classes)]
            all_masks = [[] for _ in xrange(self.num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, self.num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, self.num_classes)]
            masks = [all_masks[j] for j in range(1, self.num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
            im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
            print(im_height, im_width)
            boxes = clip_boxes(boxes[0], (im_height, im_width))
            result_masks, result_dets = gpu_mask_voting(
                masks, boxes, scores[0], self.num_classes, 100, im_width,
                im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                config.BINARY_THRESH, self.ctx_id[0])

            dets = [result_dets[j] for j in range(1, self.num_classes)]
            masks = [
                result_masks[j][:, 0, :, :]
                for j in range(1, self.num_classes)
            ]

        for i in xrange(1, len(dets)):
            keep = np.where(dets[i][:, -1] > 1)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]

        keep = np.where(dets[0][:, -1] > 0.8)
        dets[0] = dets[0][keep]
        masks[0] = masks[0][keep]

        newmask = show_masks(self.fg, dets, masks, self.classes,
                             config)  #!!!!!!!! wrong mask
        self.result = newmask
        return newmask
Пример #13
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_deeplab_dcn' if not args.deeplab_only else 'resnet_v1_101_deeplab'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 19

    # load demo data
    image_names = [
        'frankfurt_000001_073088_leftImg8bit.png',
        'lindau_000024_000019_leftImg8bit.png'
    ]
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), (
            '%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data']
    label_names = ['softmax_label']
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in range(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in range(len(data))]
    provide_label = [None for i in range(len(data))]
    arg_params, aux_params = load_param(
        cur_path + '/../model/' +
        ('deeplab_dcn_cityscapes'
         if not args.deeplab_only else 'deeplab_cityscapes'),
        0,
        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # warm up
    for j in range(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        output_all = predictor.predict(data_batch)
        output_all = [
            mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy()
            for output in output_all
        ]

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])

        tic()
        output_all = predictor.predict(data_batch)
        output_all = [
            mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy()
            for output in output_all
        ]
        pallete = getpallete(256)

        segmentation_result = np.uint8(np.squeeze(output_all))
        segmentation_result = Image.fromarray(segmentation_result)
        segmentation_result.putpalette(pallete)
        print('testing {} {:.4f}s'.format(im_name, toc()))
        pure_im_name, ext_im_name = os.path.splitext(im_name)
        segmentation_result.save(cur_path + '/../demo/seg_' + pure_im_name +
                                 '.png')
        # visualize
        im_raw = cv2.imread(cur_path + '/../demo/' + im_name)
        seg_res = cv2.imread(cur_path + '/../demo/seg_' + pure_im_name +
                             '.png')
        cv2.imshow('Raw Image', im_raw)
        cv2.imshow('segmentation_result', seg_res)
        cv2.waitKey(0)
    print('done')
Пример #14
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'impression_network_dynamic_offset_sparse'
    model = '/../local_run_output/impression_dynamic_offset-lr-10000-times-neighbor-4-dense-4'
    first_sym_instance = eval(config.symbol + '.' + config.symbol)()
    key_sym_instance = eval(config.symbol + '.' + config.symbol)()
    cur_sym_instance = eval(config.symbol + '.' + config.symbol)()

    first_sym = first_sym_instance.get_first_test_symbol_impression(config)
    key_sym = key_sym_instance.get_key_test_symbol_impression(config)
    cur_sym = cur_sym_instance.get_cur_test_symbol_impression(config)

    # set up class names
    num_classes = 31
    classes = [
        'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
        'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
        'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
        'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
        'watercraft', 'whale', 'zebra'
    ]

    # load demo data
    image_names = glob.glob(cur_path +
                            '/../demo/ILSVRC2015_val_00011005/*.JPEG')
    output_dir = cur_path + '/../demo/motion-prior-output-00011005/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = 10
    image_names.sort()
    data = []
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            if idx == 0:
                data_oldkey = im_tensor.copy()
                data_newkey = im_tensor.copy()
                data_cur = im_tensor.copy()
            else:
                data_oldkey = data_newkey.copy()
                data_newkey = im_tensor
        else:
            data_cur = im_tensor
        shape = im_tensor.shape
        infer_height = int(np.ceil(shape[2] / 16.0))
        infer_width = int(np.ceil(shape[3] / 16.0))
        data.append({
            'data_oldkey':
            data_oldkey,
            'data_newkey':
            data_newkey,
            'data_cur':
            data_cur,
            'im_info':
            im_info,
            'impression':
            np.zeros(
                (1, config.network.DFF_FEAT_DIM, infer_height, infer_width)),
            'key_feat_task':
            np.zeros(
                (1, config.network.DFF_FEAT_DIM, infer_height, infer_width))
        })

    # get predictor
    data_names = [
        'data_oldkey', 'data_cur', 'data_newkey', 'im_info', 'impression',
        'key_feat_task'
    ]
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data_oldkey', (1, 3, max([v[0] for v in config.SCALES]),
                         max([v[1] for v in config.SCALES]))),
        ('data_newkey', (1, 3, max([v[0] for v in config.SCALES]),
                         max([v[1] for v in config.SCALES]))),
        ('data_cur', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
        ('impression', (1, 1024, 38, 63)), ('key_feat_task', (1, 1024, 38, 63))
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 4, process=True)
    first_predictor = Predictor(first_sym,
                                data_names,
                                label_names,
                                context=[mx.gpu(0)],
                                max_data_shapes=max_data_shape,
                                provide_data=provide_data,
                                provide_label=provide_label,
                                arg_params=arg_params,
                                aux_params=aux_params)
    key_predictor = Predictor(key_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    cur_predictor = Predictor(cur_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)
    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][3].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        if j % key_frame_interval == 0:  # keyframe
            if j == 0:  # first frame
                scores, boxes, data_dict, conv_feat, _, _, _ = im_detect_impression_online(
                    first_predictor, data_batch, data_names, scales, config)
                feat_task = conv_feat
                impression = conv_feat
            else:  # keyframe
                data_batch.data[0][-2] = impression
                data_batch.provide_data[0][-2] = ('impression',
                                                  impression.shape)
                scores, boxes, data_dict, conv_feat, impression, feat_task = im_detect_impression_online(
                    key_predictor, data_batch, data_names, scales, config)
        else:  # current frame
            data_batch.data[0][-1] = feat_task
            data_batch.provide_data[0][-1] = ('key_feat_task', feat_task.shape)
            scores, boxes, data_dict, _, _, _, _ = im_detect_impression_online(
                cur_predictor, data_batch, data_names, scales, config)
    print "warmup done"
    # test
    time = 0
    count = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][3].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        tic()
        print(idx)
        if idx % key_frame_interval == 0:  # keyframe
            if idx == 0:  # first frame
                scores, boxes, data_dict, conv_feat, _, _, _ = im_detect_impression_online(
                    first_predictor, data_batch, data_names, scales, config)
                feat_task = conv_feat
                impression = conv_feat
                feat_task_numpy = feat_task.asnumpy()
                np.save("features/impression_%s.npy" % (idx), feat_task_numpy)
            else:  # keyframe
                data_batch.data[0][-2] = impression
                data_batch.provide_data[0][-2] = ('impression',
                                                  impression.shape)

                scores, boxes, data_dict, conv_feat, impression, feat_task, _ = im_detect_impression_online(
                    key_predictor, data_batch, data_names, scales, config)
                feat_task_key_numpy = feat_task.asnumpy()
                np.save("features/impression_%s.npy" % (idx),
                        feat_task_key_numpy)
        else:  # current frame
            data_batch.data[0][-1] = feat_task
            data_batch.provide_data[0][-1] = ('key_feat_task', feat_task.shape)
            scores, boxes, data_dict, _, _, _, feat_task_cur = im_detect_impression_online(
                cur_predictor, data_batch, data_names, scales, config)
            if idx >= 1:
                feat_task_cur_numpy = feat_task_cur.asnumpy()
                np.save("features/impression_%s.npy" % (idx),
                        feat_task_cur_numpy)
                #import pdb;pdb.set_trace()
        time += toc()
        count += 1
        print 'testing {} {:.4f}s'.format(im_name, time / count)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        # visualize
        im = cv2.imread(im_name)
        #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        # show_boxes(im, dets_nms, classes, 1)
        out_im = draw_boxes(im, dets_nms, classes, 1)
        _, filename = os.path.split(im_name)
        cv2.imwrite(output_dir + filename, out_im)
    print 'done'
Пример #15
0
def main():
    # get symbol

    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_fpn_dcn_rcnn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)
    max_per_image = config.TEST.max_per_image

    # Print the test scales
    print("Train scales: %s" % str(config.SCALES))
    print("Test scales: %s" % str(config.TEST_SCALES))

    # load demo data
    #dataBaseDir = '/b_test/pkhan/datasets/Receipts/data/'
    dataBaseDir = '/netscratch/queling/data/'
    outputBaseDir = '/netscratch/queling/Deformable/output/fpn/deep_receipt/results/' + EXPERIMENT_NAME
    #outputBaseDir = '/b_test/pkhan/Code/Deformable/output/' + EXPERIMENT_NAME

    if os.path.exists(outputBaseDir):
        shutil.rmtree(outputBaseDir)
    os.mkdir(outputBaseDir)

    outputFile = open(os.path.join(outputBaseDir, 'output.txt'), 'w')
    outputFile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
    errorStatsFile = open(
        os.path.join(outputBaseDir, 'incorrect-detections.txt'), 'w')

    incorrectDetectionResultsPath = os.path.join(outputBaseDir,
                                                 'IncorrectDetections')
    if not os.path.exists(incorrectDetectionResultsPath):
        os.mkdir(incorrectDetectionResultsPath)

    detectionResultsPath = os.path.join(outputBaseDir, 'Detections')
    if not os.path.exists(detectionResultsPath):
        os.mkdir(detectionResultsPath)

    annotationResultsPath = os.path.join(outputBaseDir, 'Annotations')
    if not os.path.exists(annotationResultsPath):
        os.mkdir(annotationResultsPath)

    statistics = {}
    for cls_ind, cls in enumerate(CLASSES):
        statistics[cls] = {}
        for thresh in IoU_THRESHOLDS:
            statistics[cls][thresh] = {}
            statistics[cls][thresh]["truePositives"] = 0
            statistics[cls][thresh]["falsePositives"] = 0
            statistics[cls][thresh]["falseNegatives"] = 0
            statistics[cls][thresh]["precision"] = 0
            statistics[cls][thresh]["recall"] = 0
            statistics[cls][thresh]["fMeasure"] = 0

    im_names_file = open(os.path.join(dataBaseDir, 'ImageSets/image.txt'),
                         'r')  #test.txt for whole dataset, image.txt for one

    for im_name in im_names_file:
        im_name = im_name.strip()
        # print ("Processing file: %s" % (im_name))

        found = False
        for ext in IMAGE_EXTENSIONS:

            im_name_with_ext = im_name + ext
            im_path = os.path.join(
                dataBaseDir, 'Test',
                im_name_with_ext)  #Images for whole dataset, Test for one

            if os.path.exists(im_path):
                found = True
                break
        if not found:
            print("Error: Unable to locate file %s" % (im_name))
            exit(-1)

        # Load GT annotations

        xml_path = os.path.join(dataBaseDir, 'Annotations', im_name + '.xml')

        #gtBBoxes = loadGTAnnotationsFromXML(xml_path)

        tic()

        dets_nms = [[] for j in range(len(TOTAL_CLASSES) - 1)]

        for testScale in config.SCALES:
            data = []
            im = cv2.imread(im_path,
                            cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            target_size = testScale[0]
            max_size = testScale[1]
            im, im_scale = resize(im,
                                  target_size,
                                  max_size,
                                  stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)
            data.append({'data': im_tensor, 'im_info': im_info})

            # get predictor
            data_names = ['data', 'im_info']
            label_names = []
            data = [[mx.nd.array(data[i][name]) for name in data_names]
                    for i in xrange(len(data))]
            max_data_shape = [[('data', (1, 3, testScale[0], testScale[1]))]]
            provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                            for i in xrange(len(data))]
            provide_label = [None for i in xrange(len(data))]
            # arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True)
            arg_params, aux_params = load_param(MODEL_PATH,
                                                MODEL_EPOCH,
                                                process=True)
            predictor = Predictor(sym,
                                  data_names,
                                  label_names,
                                  context=[mx.gpu(0)],
                                  max_data_shapes=max_data_shape,
                                  provide_data=provide_data,
                                  provide_label=provide_label,
                                  arg_params=arg_params,
                                  aux_params=aux_params)

            # # warm up
            for j in xrange(2):
                data_batch = mx.io.DataBatch(
                    data=[data[0]],
                    label=[],
                    pad=0,
                    index=0,
                    provide_data=[[(k, v.shape)
                                   for k, v in zip(data_names, data[0])]],
                    provide_label=[None])
                scales = [
                    data_batch.data[i][1].asnumpy()[0, 2]
                    for i in xrange(len(data_batch.data))
                ]
                scores, boxes, data_dict = im_detect(predictor, data_batch,
                                                     data_names, scales,
                                                     config)

            # test
            image_names = [im_name]  # Way around
            for idx, im_name in enumerate(image_names):
                data_batch = mx.io.DataBatch(
                    data=[data[idx]],
                    label=[],
                    pad=0,
                    index=idx,
                    provide_data=[[(k, v.shape)
                                   for k, v in zip(data_names, data[idx])]],
                    provide_label=[None])
                scales = [
                    data_batch.data[i][1].asnumpy()[0, 2]
                    for i in xrange(len(data_batch.data))
                ]

                scores, boxes, data_dict = im_detect(predictor, data_batch,
                                                     data_names, scales,
                                                     config)
                boxes = boxes[0].astype('f')
                scores = scores[0].astype('f')

                # TODO: Multi-scale testing
                for j in range(1, scores.shape[1]):
                    cls_scores = scores[:, j, np.newaxis]
                    cls_boxes = boxes[:, 4:
                                      8] if config.CLASS_AGNOSTIC else boxes[:,
                                                                             j *
                                                                             4:
                                                                             (j
                                                                              +
                                                                              1
                                                                              )
                                                                             *
                                                                             4]
                    cls_dets = np.hstack((cls_boxes, cls_scores))
                    # if config.TEST.USE_SOFTNMS:
                    #     soft_nms = py_softnms_wrapper(config.TEST.SOFTNMS_THRESH, max_dets=max_per_image)
                    #     cls_dets = soft_nms(cls_dets)
                    # else:
                    #     nms = py_nms_wrapper(config.TEST.NMS)
                    #     keep = nms(cls_dets)
                    #     cls_dets = cls_dets[keep, :]
                    # cls_dets = cls_dets[cls_dets[:, -1] > confidenceThreshold, :]
                    # dets_nms.append(cls_dets)
                    if len(dets_nms[j - 1]) == 0:
                        dets_nms[j - 1] = cls_dets
                    else:
                        dets_nms[j - 1] += cls_dets

        finalDetections = []
        for clsIter in range(len(dets_nms)):
            # print ("Performing NMS on cls %d with %d boxes" % (clsIter, len(dets_nms[clsIter])))
            if config.TEST.USE_SOFTNMS:
                soft_nms = py_softnms_wrapper(config.TEST.SOFTNMS_THRESH,
                                              max_dets=max_per_image)
                # cls_dets = soft_nms(dets_nms[clsIter])
                dets_nms[clsIter] = soft_nms(dets_nms[clsIter])
            else:
                nms = py_nms_wrapper(config.TEST.NMS)
                keep = nms(dets_nms[clsIter])
                # cls_dets = dets_nms[clsIter][keep, :]
                dets_nms[clsIter] = dets_nms[clsIter][keep, :]
            dets_nms[clsIter] = dets_nms[clsIter][
                dets_nms[clsIter][:, -1] > CONFIDENCE_THRESHOLD, :]

        # if max_per_image > 0:
        #     for idx_im in range(0, num_images):
        #         image_scores = np.hstack([all_boxes[j][idx_im][:, -1]
        #                                   for j in range(1, imdb.num_classes)])
        #         if len(image_scores) > max_per_image:
        #             image_thresh = np.sort(image_scores)[-max_per_image]
        #             for j in range(1, imdb.num_classes):
        #                 keep = np.where(all_boxes[j][idx_im][:, -1] >= image_thresh)[0]
        #                 all_boxes[j][idx_im] = all_boxes[j][idx_im][keep, :]

        print 'Processing image: {} {:.4f}s'.format(im_name, toc())

        # Add detections on the image
        im = cv2.imread(
            im_path)  # Reload the image since the previous one was scaled

        item = 0
        price = 0
        asd = 0
        row = 0

        for cls_idx, cls_name in enumerate(CONCERNED_ERRORS):
            cls_dets = dets_nms[cls_idx]
            for det in cls_dets:
                predictedBBox = det[:4]
                cv2.rectangle(im,
                              (int(predictedBBox[0]), int(predictedBBox[1])),
                              (int(predictedBBox[2]), int(predictedBBox[3])),
                              (0, 0, 255), 1)
                w = predictedBBox[2] - predictedBBox[0]
                cv2.putText(im, cls_name,
                            (int(predictedBBox[0] +
                                 (w / 2.0) - 100), int(predictedBBox[1] - 5)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 255, 0), 1)

                crop_im = im[int(predictedBBox[1]):int(predictedBBox[3]),
                             int(predictedBBox[0]):int(predictedBBox[2])]
                gray = cv2.cvtColor(crop_im, cv2.COLOR_BGR2GRAY)

                if cls_name == "price":
                    asd = price + 1
                    price = price + 1
                    new_path = outputBaseDir + "/price/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)
                    outputImagePath = os.path.join(
                        new_path, cls_name + str(asd) + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    cv2.imwrite(outputImagePath, crop_im)

                elif cls_name == "item_name":
                    item = item + 1
                    asd = item
                    new_path = outputBaseDir + "/item/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)

                    outputImagePath = os.path.join(
                        new_path, cls_name + str(asd) + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    gray = cv2.medianBlur(gray, 3)

                    cv2.imwrite(outputImagePath, gray)

                elif cls_name == "row":
                    row = row + 1
                    asd = row
                    new_path = outputBaseDir + "/row/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)

                    outputImagePath = os.path.join(
                        new_path, cls_name + str(asd) + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    gray = cv2.medianBlur(gray, 3)
                    cv2.imwrite(outputImagePath, gray)

                elif cls_name == 'total_price':
                    print("Found Total")
                    new_path = outputBaseDir + "/total/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)

                    outputImagePath = os.path.join(new_path, cls_name + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    gray = cv2.medianBlur(gray, 3)
                    cv2.imwrite(outputImagePath, gray)

                elif cls_name == 'header':
                    new_path = outputBaseDir + "/header/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)

                    outputImagePath = os.path.join(new_path, cls_name + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    gray = cv2.medianBlur(gray, 3)
                    cv2.imwrite(outputImagePath, gray)

                outputImagePath = os.path.join(outputBaseDir,
                                               cls_name + str(asd) + ".jpg")
                # print ("Writing image: %s" % (outputImagePath))
                cv2.imwrite(outputImagePath, crop_im)
                text = pytesseract.image_to_string(Image.open(outputImagePath))
                #if text != "":
                # print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
                #print("")
                #print(cls_name+": "+text)
                #print(" ")
        items = []
        for k in range(1, item):
            path_item = outputBaseDir + "/item/item_name" + str(k) + ".jpg"
            text_item = pytesseract.image_to_string(Image.open(path_item))
            #text_item = spellCheck.main(text_item, "product")
            print(str(k) + ": " + text_item)

            if text_item == "":
                print("empty and not relevant")
                #print(type(text_item))
            else:
                import unicodedata
                #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))
                #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')))
                items = items + [text_item]

        print("-------------------------------------------------------------")
        prices = []
        for k in range(1, price):
            path_item = outputBaseDir + "/price/price" + str(k) + ".jpg"
            text_item = pytesseract.image_to_string(Image.open(path_item),
                                                    config="--psm 13")
            print(str(k) + ": " + text_item)
            if text_item == "":
                print("empty and not relevant")
                #print(type(text_item))
            else:
                import unicodedata
                #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))
                #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')))
                prices = prices + [text_item]

            print(
                "-------------------------------------------------------------"
            )

        rows = []
        for k in range(1, row):
            path_item = outputBaseDir + "/row/row" + str(k) + ".jpg"
            text_item = pytesseract.image_to_string(Image.open(path_item))
            if text_item == "":
                print("empty and not relevant")
                #print(type(text_item))
            else:
                import unicodedata
                #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))
                #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')))
                rows = rows + [text_item]

            print(str(k) + ": " + text_item)

        # write total in result.txt
        path_item = outputBaseDir + "/total/total_price.jpg"
        text_item = pytesseract.image_to_string(Image.open(path_item))

        f = open("/netscratch/queling/Deformable/fpn/results.txt", "a")
        f.write(text_item + "\n")
        f.close()

        #path_item = outputBaseDir+"/header/header.jpg"
        #text_item = pytesseract.image_to_string(Image.open(path_item))
        #print("Header: "+text_item)

        found = False

        for k in range(0, len(items)):
            for l in range(0, len(rows)):
                #print(type(items[k]))
                #print(type(rows[l]))
                if items[k].encode('ascii', 'ignore') in rows[l].encode(
                        'ascii', 'ignore'):
                    for m in range(0, len(prices)):
                        #print(type(prices[m].encode('ascii' ,'ignore')))
                        if prices[k].encode('ascii',
                                            'ignore') in rows[l].encode(
                                                'ascii', 'ignore'):
                            #items[k] = spellCheck.main(items[k], "product")
                            f = open(
                                "/netscratch/queling/Deformable/fpn/results.txt",
                                "a")
                            f.write(items[k] + "\n")
                            f.write(str(prices[m]) + "\n")
                            f.close()
                            found = True

            # Product not found in row
            if (found == False):
                #items[k] = spellCheck.main(items[k], "product")
                f = open("/netscratch/queling/Deformable/fpn/results.txt", "a")
                f.write(items[k] + "\n")
                f.write(" " + "\n")
                f.close()

            found = False

        # Add gt annotations
        #for bbox in gtBBoxes:
        #    if bbox[5] in CONCERNED_ERRORS:
        #        cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1)

        # Computate the statistics for the current image
        #statistics, classificationErrorMessage = computeStatistics(dets_nms, gtBBoxes, statistics, IoU_THRESHOLDS)
        #if classificationErrorMessage is not None:
        #    print ("Writing incorrect image: %s" % (im_name))
        #    errorStatsFile.write("%s: %s\n" % (im_name, classificationErrorMessage))
        #    cv2.imwrite(os.path.join(incorrectDetectionResultsPath, im_name + '.jpg'), im)

        # Write the output in ICDAR Format
        outputFile.write(convertToXML(im_name_with_ext, dets_nms))

        if WRITE_DETECTION_RESULTS:
            # visualize
            # im = cv2.imread(im_path)
            # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

            # # Get also the plot for saving on server
            # _, plt = show_boxes(im, dets_nms, CLASSES, 1, returnPlt=True)
            # plt.savefig(os.path.join(outputBaseDir, 'Detections', im_name[:im_name.rfind('.')] + ".png"))

            outputImagePath = os.path.join(detectionResultsPath,
                                           im_name + ".jpg")
            print("Writing image: %s" % (outputImagePath))
            cv2.imwrite(outputImagePath, im)

        if WRITE_ANNOTATION_RESULTS:
            exportToPascalVOCFormat(im_name, im_path, dets_nms,
                                    annotationResultsPath)

    outputFile.close()
    errorStatsFile.close()

    total_classes = 0
    total_F_Meausere = 0
    average_F_Meausere = 0
    # Compute final precision and recall
    outputFile = open(
        os.path.join(outputBaseDir,
                     'output-stats-' + EXPERIMENT_NAME + '.txt'), 'w')
Пример #16
0
def main():
    import argparse

    """Parse input arguments."""
    parser = argparse.ArgumentParser(description='FCIS demo')
    parser.add_argument('--cfg', dest='cfg_file', help='required config file (YAML file)', 
                        required=True, type=str)
    parser.add_argument('--model', dest='model', help='path to trained model (.params file)',
                        required=True, type=str)
    parser.add_argument('--img_dir', dest='img_dir', help='path to directory of images for demo',
                        required=True, type=str)
    parser.add_argument('--min_score', dest='min_score', help='Minimum score. Default 0.85',
                            default=0.85, type=float)
    parser.add_argument('--save', dest='save', help='Saves inference data per image as JSON files (stored in img_dir directory)',
                         action='store_true')
    parser.add_argument('--novis', dest='novis', help='Turn off visualization of inference',
                         action='store_true')
    parser.add_argument('--wait', dest='wait', help='Set the wait time in between frames in opencv waitKey() (default 0 i.e. pause until button pressed)',
                         default=0, type=int)

    args = parser.parse_args()

    # load image demo directory
    img_dir = args.img_dir
    assert osp.exists(img_dir), ("Could not find image directory %s"%(img_dir))

    image_names = nts(glob.glob(osp.join(img_dir,"*.jpg")))

    if len(image_names) == 0:
        print("No files in %s"%(img_dir))
        return

    cfg_path = args.cfg_file
    model_path = args.model

    # load net
    fcis_net = FCISNet(cfg_path, model_path)
    CLASSES = fcis_net.classes

    # test: run predictions
    CONF_THRESH = args.min_score
    print("Using min score of %.3f...\n"%(CONF_THRESH))

    for idx, im_name in enumerate(image_names):
        im = cv2.imread(im_name, cv2.IMREAD_COLOR)# | cv2.IMREAD_IGNORE_ORIENTATION)
        if im is None:
            print("Could not read %s"%(im_name))
            continue
        # im_copy = im.copy()
        
        tic()
        dets, masks = fcis_net.forward(im, conf_thresh=CONF_THRESH)
        print('inference time %s: %.4fs'%(im_name, toc()))

        if args.save:
            im_name_basename = im_name[:im_name.rfind('.')]
            json_file = osp.join(img_dir, im_name_basename + ".json")
            reformatted_data = reformat_data(dets, masks, CLASSES)
            write_reformat_data_json(reformatted_data, json_file)

        # vis
        im_out_file = "/home/vincent/hd/deep_learning/tmp/FCIS/%s"%(im_name.split("/")[-1])
        if not args.novis:
            plt_show = args.wait == 0 
            im_seg = show_masks(im, dets, masks, CLASSES, config.BINARY_THRESH, show=plt_show)
            im_seg = cv2.resize(im_seg, (960,540))
            cv2.imwrite(im_out_file,im_seg)
            print("Saved to %s"%(im_out_file))
            if not plt_show:
                cv2.imshow("seg", im_seg)
                cv2.waitKey(args.wait) 

    print('\nDONE\n')
Пример #17
0
def main():
    # get symbol
    ctx_id = [int(i) for i in config.gpus.split(',')]
    pprint.pprint(config)
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 2
    classes = [
        '__background__',  # always index 0
        '1'
    ]

    # load demo data
    image_names = []
    names_dirs = os.listdir(cur_path + '/../' + test_dir)
    for im_name in names_dirs:
        if im_name[-4:] == '.jpg' or im_name[-4:] == '.png':
            image_names.append(im_name)

    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../' + test_dir + im_name), (
            '%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../' + test_dir + im_name,
                        cv2.IMREAD_COLOR | long(128))
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        #print "before scale: "
        #print im.shape
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        #print "after scale: "
        #print im.shape
        #im_scale = 1.0
        #print "scale ratio: "
        #print im_scale
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        #print im_tensor.shape
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../' + model_dir,
                                        0,
                                        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(ctx_id[0])],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # warm up
    for i in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        _, _, _, _ = im_detect(predictor, data_batch, data_names, scales,
                               config)
    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch,
                                                    data_names, [1.0], config)
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]
        #print im_shapes

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(num_classes)]
            all_masks = [[] for _ in xrange(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            result_masks, result_dets = gpu_mask_voting(
                masks, boxes[0], scores[0], num_classes, 100, im_shapes[0][1],
                im_shapes[0][0], config.TEST.NMS,
                config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0])

            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [
                result_masks[j][:, 0, :, :] for j in range(1, num_classes)
            ]
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:, -1] > 0.7)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]
        im = cv2.imread(cur_path + '/../' + test_dir + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_masks(im, dets, masks, classes, config, 1.0 / scales[0], False)

        # Save img
        cv2.imwrite(cur_path + '/../' + result_dir + im_name,
                    cv2.cvtColor(im, cv2.COLOR_BGR2RGB))

    print 'done'
Пример #18
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_flownet_rfcn'
    model = '/../model/rfcn_dff_flownet_vid'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_batch_test_symbol(config)
    sym.save('dff_rfcn.json')
    #print config.network.get_internals()
    #mx.visualization.plot_network(sym).view()
    #print sym.get_intervals()
    #x = input()
    # set up class names
    num_classes = 31
    classes = [
        'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
        'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
        'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
        'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
        'watercraft', 'whale', 'zebra'
    ]

    # load demo data
    image_names = glob.glob(cur_path + '/../demo/sample/*.JPEG')
    output_dir = cur_path + '/../demo/rfcn_dff_batch/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = 10

    #

    data = []
    key_im_tensor = None
    cur_im_tensor = []
    im_info_tensor = []
    image_names_list = []
    image_names_batch = []
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(
            im_name)  #, cv2.IMREAD_COLOR)# | cv2.IMREAD_IGNORE_ORIENTATION)
        #im = cv2.resize(im, (176,176,3))
        #height, width, channel = img.shape
        #gray = im = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        #im = np.zeros(height * width * channel).reshape((height, width, channel))
        #im[:,:,0] = gray
        #im[:,:,1] = gray
        #im[:,:,2] = gray
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        #print im.shape
        #print im_scale.shape
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            key_im_tensor = im_tensor
        else:
            cur_im_tensor.append(im_tensor)
        im_info_tensor.append(im_info)
        image_names_batch.append(im_name)
        if (idx + 1) % key_frame_interval == 0 or idx == len(image_names) - 1:
            data.append({
                'data_other': np.concatenate(cur_im_tensor),
                'im_info': np.concatenate(im_info_tensor),
                'data_key': key_im_tensor
            })
            key_im_tensor = None
            cur_im_tensor = []
            im_info_tensor = []
            image_names_list.append(image_names_batch)
            image_names_batch = []

    # get predictor
    data_names = ['data_other', 'im_info', 'data_key']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data_other', (key_frame_interval - 1, 3,
                        max([v[0] for v in config.SCALES]),
                        max([v[1] for v in config.SCALES]))),
        ('data_key', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 0, process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    #print predictor
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(1):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[:, 2]
            for i in xrange(len(data_batch.data))
        ]
        print scales[0].shape
        scores_all, boxes_all, data_dict = im_batch_detect(
            predictor, data_batch, data_names, scales, config)

    print "warmup done"
    # test
    time = 0
    count = 0
    for idx, im_names in enumerate(image_names_list):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[:, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores_all, boxes_all, data_dict = im_batch_detect(
            predictor, data_batch, data_names, scales, config)
        time += toc()
        count += len(scores_all)
        print 'testing {} {:.4f}s x {:d}'.format(im_names[0], time / count,
                                                 len(scores_all))
        '''
        for batch_idx in xrange(len(scores_all)):
            boxes = boxes_all[batch_idx].astype('f')
            scores = scores_all[batch_idx].astype('f')
            dets_nms = []
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
                dets_nms.append(cls_dets)
            # visualize
            im = cv2.imread(im_names[batch_idx])
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            # show_boxes(im, dets_nms, classes, 1)
            out_im = draw_boxes(im, dets_nms, classes, 1)
            _, filename = os.path.split(im_names[batch_idx])
            cv2.imwrite(output_dir + filename,out_im)
	'''

    print 'done'
Пример #19
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = [
        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
        'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]

    # load demo data
    image_names = [
        'COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg'
    ]
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), (
            '%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(
        cur_path + '/../model/' +
        ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'),
        0,
        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        im = cv2.imread(cur_path + '/../demo/' + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_boxes(im, dets_nms, classes, 1)

    print 'done'
Пример #20
0
def process_video_frame(raw_frame_queue, bbox_frame_queue):
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)
    arg_params, aux_params = load_param(
        './output/rfcn/road_obj/road_train_all/all/' + 'rfcn_road',
        19,
        process=True)

    # set up class names; Don't count the background in, even we are treat the background as label '0'
    num_classes = 4
    classes = ['vehicle', 'pedestrian', 'cyclist', 'traffic lights']

    target_size = config.SCALES[0][1]
    max_size = config.SCALES[0][1]

    while True:
        tic()
        i = 0
        data = []
        frame_list = []
        while len(data) < 15:
            frame = raw_frame_queue.get()
            if frame is None:
                continue
            if i < 2:
                i += 1
                frame, im_scale = resize(frame,
                                         target_size,
                                         max_size,
                                         stride=config.network.IMAGE_STRIDE)
                bbox_frame_queue.put(frame)
                continue
            frame, im_scale = resize(frame,
                                     target_size,
                                     max_size,
                                     stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(frame, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)
            data.append({'data': im_tensor, 'im_info': im_info})
            frame_list.append(frame)

        # get predictor
        data_names = ['data', 'im_info']
        label_names = []
        data = [[mx.nd.array(data[i][name]) for name in data_names]
                for i in xrange(len(data))]
        # print('Debug: [data] shape: {}, cont: {}'.format(type(data), data))
        max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                     max([v[1] for v in config.SCALES])))]]
        # print('Debug: [max_data_shape] shape: {}, cont: {}'.format(type(max_data_shape), max_data_shape))
        provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                        for i in xrange(len(data))]
        # print('Debug: [provide_data] shape: {}, cont: {}'.format(type(provide_data), provide_data))
        provide_label = [None for i in xrange(len(data))]
        # print('Debug: [provide_label] shape: {}, cont: {}'.format(type(provide_label), provide_label))
        predictor = Predictor(sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
        nms = gpu_nms_wrapper(config.TEST.NMS, 0)

        # Process video frame
        # image_names = ['frame']
        # for idx, frame in enumerate(frame_list):
        data_batch = mx.io.DataBatch(data=data,
                                     label=[],
                                     pad=0,
                                     provide_data=provide_data,
                                     provide_label=provide_label)
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        # print("length: {}".format(len(data_batch.data)))
        # print('Debug: [scales] cont: {}'.format(scales))
        scores_all, boxes_all, data_dict_all = im_detect(
            predictor, data_batch, data_names, scales, config)
        # print('scores_all: Type: {}, Values: {}, Length: {}'.format(type(scores_all), scores_all, len(scores_all)))
        # print('boxes_all: Type: {}, Values: {}, Length: {}'.format(type(boxes_all), boxes_all, len(boxes_all)))
        # print('data_dict_all: Type: {}, Values: {}, length: {}'.format(type(data_dict_all), data_dict_all, len(data_dict_all)))
        # print('frame_list: Type: {}, Values: {}, Length: {}'.format(type(frame_list), frame_list, len(frame_list)))

        # print('scores_all: Type: {}, Length: {}, Values: {}'.format(type(scores_all[0]), len(scores_all[0]), scores_all[0]))
        # print(scores_all[0].shape)
        # print('boxes_all: Type: {}, Length: {}'.format(type(boxes_all), len(boxes_all)))
        # print(boxes_all[0].shape)
        # print('data_dict_all: Type: {}, length: {}'.format(type(data_dict_all), len(data_dict_all)))
        # print('frame_list: Type: {}, Length: {}'.format(type(frame_list), len(frame_list)))

        for idx, frame in enumerate(frame_list):
            # print('index: {}'.format(str(idx)))
            boxes = boxes_all[0].astype('f')
            scores = scores_all[0].astype('f')
            dets_nms = []
            # print(scores.shape)
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:,
                                  4:8] if config.CLASS_AGNOSTIC else boxes[:,
                                                                           j *
                                                                           4:
                                                                           (j +
                                                                            1
                                                                            ) *
                                                                           4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
                dets_nms.append(cls_dets)

            bbox_frame_queue.put(
                draw_bbox_on_frame(frame, dets_nms, classes,
                                   scale=scales[idx]))
        print(toc())
Пример #21
0
def predict_on_image_names(
        image_names,
        config,
        model_path_id="/home/data/output/resnet_v1_101_coco_fcis_end2end_ohem-nebraska/train-nebraska/e2e",
        epoch=8):
    import argparse
    import os
    import sys
    import logging
    import pprint
    import cv2
    from utils.image import resize, transform
    import numpy as np
    # get config
    os.environ['PYTHONUNBUFFERED'] = '1'
    os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
    os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
    cur_path = os.path.abspath(".")
    sys.path.insert(
        0, os.path.join(cur_path, '../external/mxnet', config.MXNET_VERSION))
    import mxnet as mx
    print("use mxnet at", mx.__file__)
    from core.tester import im_detect, Predictor
    from symbols import *
    from utils.load_model import load_param
    from utils.show_masks import show_masks
    from utils.tictoc import tic, toc
    from nms.nms import py_nms_wrapper
    from mask.mask_transform import gpu_mask_voting, cpu_mask_voting
    # get symbol
    ctx_id = [int(i) for i in config.gpus.split(',')]
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 2
    classes = ['cp']

    # load demo data
    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]

    # loading the last epoch that was trained, 8
    arg_params, aux_params = load_param(model_path_id, epoch, process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(ctx_id[0])],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    all_classes = []
    all_configs = []
    all_masks = []
    all_dets = []
    all_ims = []

    # warm up
    for i in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        _, _, _, _ = im_detect(predictor, data_batch, data_names, scales,
                               config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch,
                                                    data_names, scales, config)
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(num_classes)]
            all_masks = [[] for _ in xrange(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
            im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
            print(im_height, im_width)
            boxes = clip_boxes(boxes[0], (im_height, im_width))
            result_masks, result_dets = gpu_mask_voting(
                masks, boxes, scores[0], num_classes, 100, im_width, im_height,
                config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                config.BINARY_THRESH, ctx_id[0])

            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [
                result_masks[j][:, 0, :, :] for j in range(1, num_classes)
            ]
        print('testing {} {:.4f}s'.format(im_name, toc()))
        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:, -1] > 0.7)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]

        all_classes.append(classes)
        all_configs.append(config)
        all_masks.append(masks)
        all_dets.append(dets)
        im = cv2.imread(im_name)
        all_ims.append(im)
    return all_ims, all_dets, all_masks, all_configs, all_classes
Пример #22
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_flownet_rfcn'
    model = '/../model/rfcn_dff_flownet_vid'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    key_sym = sym_instance.get_key_test_symbol(config)
    cur_sym = sym_instance.get_cur_test_symbol(config)

    # set up class names
    num_classes = 31
    classes = [
        'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
        'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
        'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
        'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
        'watercraft', 'whale', 'zebra'
    ]

    # load demo data
    image_names = glob.glob(cur_path +
                            '/../demo/ILSVRC2015_val_00007010/*.JPEG')
    image_names.sort()
    output_dir = cur_path + '/../demo/rfcn_dff/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = 10

    #

    data = []
    key_im_tensor = None
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            key_im_tensor = im_tensor
        data.append({
            'data':
            im_tensor,
            'im_info':
            im_info,
            'data_key':
            key_im_tensor,
            'feat_key':
            np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1))
        })

    # get predictor
    data_names = ['data', 'im_info', 'data_key', 'feat_key']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in config.SCALES]),
                  max([v[1] for v in config.SCALES]))),
        ('data_key', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 0, process=True)
    key_predictor = Predictor(key_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    cur_predictor = Predictor(cur_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        if j % key_frame_interval == 0:
            scores, boxes, data_dict, feat = im_detect(key_predictor,
                                                       data_batch, data_names,
                                                       scales, config)
        else:
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch,
                                                    data_names, scales, config)

    print "warmup done"
    # test
    time = 0
    count = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        mark = ''
        if idx % key_frame_interval == 0:
            scores, boxes, data_dict, feat = im_detect(key_predictor,
                                                       data_batch, data_names,
                                                       scales, config)
            mark = '+'
        else:
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch,
                                                    data_names, scales, config)
        time_elapsed = toc()
        time += time_elapsed
        count += 1
        print 'testing {} {:.4f}s {:.4f}s {}'.format(im_name, time / count,
                                                     time_elapsed, mark)

        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        # visualize
        im = cv2.imread(im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        # show_boxes(im, dets_nms, classes, 1)
        out_im = draw_boxes(im, dets_nms, classes, 1)
        _, filename = os.path.split(im_name)
        cv2.imwrite(output_dir + filename, out_im)
        if enable_cv2_imshow:
            cv2.imshow('out_im', out_im)
            cv2.waitKey(1)

    print 'done'
def process_one_batch_images_fun(isUrlFlag=False,
                                 one_batch_images_list=None,
                                 init_model_param=None,
                                 fileOp=None,
                                 vis=None):
    num_classes = RFCN_DCN_CONFIG['num_classes']  # 0 is background,
    classes = RFCN_DCN_CONFIG['num_classes_name_list']
    image_names = one_batch_images_list
    if len(image_names) <= 0:
        return
    all_can_read_image = []
    data = []
    for im_name in image_names:
        #print("process : %s"%(im_name))
        im = readImage_fun(isUrlFlag=isUrlFlag, imagePath=im_name)
        # 判断 这个图片是否可读
        if np.shape(im) == ():
            print("ReadImageError : %s" % (im_name))
            continue
        if im.shape[2] != 3:
            print("%s channel is not 3" % (im_name))
            continue
        all_can_read_image.append(im_name)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]

    predictor = Predictor(init_model_param[0],
                          data_names,
                          label_names,
                          context=[mx.gpu(int(args.gpuId))],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=init_model_param[1],
                          aux_params=init_model_param[2])
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    for idx, im_name in enumerate(all_can_read_image):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > min_threshold, :]
            dets_nms.append(cls_dets)
        print('testing {} {:.4f}s'.format(im_name, toc()))
        show_boxes(isUrlFlag=isUrlFlag,
                   im_name=im_name,
                   dets=dets_nms,
                   classes=classes,
                   scale=1,
                   vis=vis,
                   fileOp=fileOp,
                   flag=args.outputFileFlag)
    print('process one batch images done')
    pass
Пример #24
0
def main():
    # get symbol
    ctx_id = [int(i) for i in config.gpus.split(',')]
    pprint.pprint(config)
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = [
        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
        'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]

    # load demo data
    image_names = [
        'COCO_test2015_000000000275.jpg', 'COCO_test2015_000000001412.jpg',
        'COCO_test2015_000000073428.jpg', 'COCO_test2015_000000393281.jpg'
    ]
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), (
            '%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/fcis_coco',
                                        convert=True,
                                        0,
                                        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.cpu()],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch,
                                                    data_names, scales, config)
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(num_classes)]
            all_masks = [[] for _ in xrange(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
            im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
            print(im_height, im_width)
            boxes = clip_boxes(boxes[0], (im_height, im_width))
            result_masks, result_dets = cpu_mask_voting(
                masks, boxes, scores[0], num_classes, 100, im_width, im_height,
                config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                config.BINARY_THRESH)

            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [
                result_masks[j][:, 0, :, :] for j in range(1, num_classes)
            ]

        print 'testing {} {:.4f}s'.format(im_name, toc())

        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:, -1] > 0.7)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]

        for i in range(len(dets)):
            if len(dets[i]) > 0:
                for j in range(len(dets[i])):
                    print('{name}: {score} ({loc})'.format(
                        name=classes[i],
                        score=dets[i][j][-1],
                        loc=dets[i][j][:-1].tolist()))

    print 'done'
Пример #25
0
def Pcs_each_vdo(vid_list, sv_dir, GPU):
    # process each_video
    for order, ec_vid in enumerate(vid_list):
        ec_vid = str(ec_vid)
        tic()
        fm_lis = glob.glob(ec_vid + '/*')
        fm_num = len(fm_lis)  # OK count right
        vd_name = ec_vid.split('/')[-1]
        sv_ph = sv_dir + '/' + vd_name
        # judge whether the json exist
        json_fl = sv_ph + '.json'  # if exist and the key length is frame then PASS
        if os.path.exists(json_fl):
            with open(json_fl, 'r') as load_f:
                load_file = json.load(load_f)
                det_frm_nm = len(list(load_file.keys()))
                if det_frm_nm == fm_num:
                    print 'the file name  ', order + 1, ' ', vd_name, '  Exist OK  Passed'
                    continue
        print 'The next vid name:  ', vd_name, '  Frm num: ', fm_num
        jpg_bs_name = fm_lis[0][:-10]
        frm_list = [
            jpg_bs_name + '%06d.jpg' % No for No in range(1, fm_num + 1)
        ]
        ft_im = cv2.imread(frm_list[0], 1 | 128)
        ft_im, im_scale = resize(ft_im,
                                 target_size,
                                 max_size,
                                 stride=config.network.IMAGE_STRIDE)
        im_tenssp = transform(ft_im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tenssp.shape[2], im_tenssp.shape[3], im_scale]],
            dtype=np.float32)
        data = []
        # for fm_dir in frm_list:
        # to change back to change back to change back to change back to change back to change back to change back

        # ====================  get each video frames and generate data ====================
        for fm_dir in frm_list:
            fm = cv2.imread(fm_dir, 1 | 128)
            #target_size  #max_size
            # im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
            fm, _ = resize(fm,
                           target_size,
                           max_size,
                           stride=config.network.IMAGE_STRIDE)
            fm_sptensor = transform(fm, config.network.PIXEL_MEANS)
            # im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
            data.append({'data': fm_sptensor, 'im_info': im_info})

        for x in locals().keys():
            del locals()[x]
        Memery = gc.collect()
        del fm_sptensor, fm
        Memery = gc.collect()

        # process
        data = [[mx.nd.array(data[i][name]) for name in data_names]
                for i in xrange(len(data))]
        provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                        for i in xrange(len(data))]
        provide_label = [None for i in xrange(len(data))]

        predictor = Predictor(sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(GPU)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
        # nms  discleared  PASSED
        # test

        # ====================  process each to get bbox  ====================
        each_video_det = {}
        for idx, im_name in enumerate(frm_list):
            data_batch = mx.io.DataBatch(
                data=[data[idx]],
                label=[],
                pad=0,
                index=idx,
                provide_data=[[(k, v.shape)
                               for k, v in zip(data_names, data[idx])]],
                provide_label=[None])
            scales = [
                data_batch.data[i][1].asnumpy()[0, 2]
                for i in xrange(len(data_batch.data))
            ]
            scores, boxes, data_dict = im_detect(predictor, data_batch,
                                                 data_names, scales, config)
            boxes = boxes[0].astype('f')
            scores = scores[0].astype('f')
            dets_nms = []
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:,
                                  4:8] if config.CLASS_AGNOSTIC else boxes[:,
                                                                           j *
                                                                           4:
                                                                           (j +
                                                                            1
                                                                            ) *
                                                                           4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
                if len(cls_dets) > 0:
                    dets_nms.append(
                        np.insert(cls_dets, 5, values=j, axis=1).tolist())

            each_video_det[im_name.split('/')[-1][-10:-4]] = dets_nms
            # print 'testing {} {:.4f}s'.format(im_name, toc())
            # # visualize
            # im = cv2.imread(im_name)
            # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            # show_boxes(im, dets_nms, classes, 1)
        save_each_video_det(video_det=each_video_det,
                            save_dir=sv_dir,
                            video_name=vd_name)
        print order + 1, '/', len(vid_list), 'time {:.4f}s'.format(
            toc()), vd_name
Пример #26
0
def inference_rcnn_AICity(cfg, dataset, image_set, root_path, dataset_path,
              ctx, prefix, epoch,
              vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None):
    if not logger:
        assert False, 'require a logger'

    # print cfg
    pprint.pprint(cfg)
    logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg)))

    # load symbol and testing data
    if has_rpn:
        sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
        sym = sym_instance.get_symbol(cfg, is_train=False)
        imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path)
        roidb = imdb.gt_roidb_Shuo()
	#roidb = imdb.gt_roidb()
    else:
        sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
        sym = sym_instance.get_symbol_rfcn(cfg, is_train=False)
        imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path)
        gt_roidb = imdb.gt_roidb_Shuo()
        roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb)

    print 'len(roidb):',len(roidb)
    # get test data iter
    test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn)

    # load model
    arg_params, aux_params = load_param(prefix, epoch, process=True)
    print 'inferring: ',prefix,' epoch: ',epoch
    
    """# write parameters to file
    print 'type(arg_params):',type(arg_params)
    print 'type(aux_params):',type(aux_params)
    thefile1 = open('/raid10/home_ext/Deformable-ConvNets/data/data_Shuo/UADETRAC/arg_params.txt','w')
    thefile2 = open('/raid10/home_ext/Deformable-ConvNets/data/data_Shuo/UADETRAC/aux_params.txt','w')
    for item_arg in arg_params.items():
	thefile1.write(item_arg[0] + str(type(item_arg[1])) + str(item_arg[1].shape)+'\n')
    for item_aux in aux_params.items():
	thefile2.write(item_aux[0] + str(type(item_aux[1])) + str(item_aux[1].shape)+'\n')
    """
 
    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    sym_instance.infer_shape(data_shape_dict)

    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data_single]
    label_names = None
    max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]]
    if not has_rpn:
        max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5)))

    # create predictor
    predictor = Predictor(sym, data_names, label_names,
                          context=ctx, max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data, provide_label=test_data.provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    nms = gpu_nms_wrapper(cfg.TEST.NMS, 0)
    # start detection
    # pred_eval(predictor, test_data, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
    print 'test_data.size',test_data.size
    print 'test_data:',test_data
    print 'data_names:',data_names
    print 'test_data.provide_data:',test_data.provide_data
    print 'test_data.provide_label:',test_data.provide_label
    nnn = 0
    #classes = ['__background','vehicle']
    classes = ['Car','SUV','SmallTruck','MediumTruck','LargeTruck','Pedestrian','Bus','Van','GroupOfPeople','Bicycle', 'Motorcycle']
    #,'Pedestrian', 'GroupOfPeople','Bicycle', 'Motorcycle','TrafficSignal-Green', 'TrafficSignal-Yellow', 'TrafficSignal-Red'
    for im_info, data_batch in test_data:
        print nnn
        #print 'roidb[nnn]:',roidb[nnn]['image']
        image_name = roidb[nnn]['image']
        tic()
        scales = [iim_info[0, 2] for iim_info in im_info]
        scores_all, boxes_all, data_dict_all = im_detect(predictor, data_batch, data_names, scales, cfg)
        boxes = boxes_all[0].astype('f')
        scores = scores_all[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            threshold = 0.2 # confidence thrshold between 0 and 1
            cls_dets = cls_dets[cls_dets[:, -1] > threshold, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(image_name, toc())
        # visualize
        im = cv2.imread(image_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        
        #print 'cls_dets:',cls_dets
        show_boxes(im, dets_nms, classes, 1)
        nnn = nnn + 1
        image_name_length = len(image_name.split('/'))
        magefile_name = image_name.split('/')[image_name_length-1]
        image_name_lean = image_name.split('.')[0]
        
        if not os.path.exists(os.path.join('data', 'output')):
            os.makedirs(os.path.join('data', 'output'))
        
	import datetime,time
        output_file = os.path.join('data', 'output', str(datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) + '.txt')
        
        thefile = open(output_file,'a')
        
        #det_id = 0
        #for x_small,y_small,x_large,y_large,prob in dets_nms[0]:
        #det_id += 1

        for cls_idx, cls_name in enumerate(classes):
            cls_dets = dets_nms[cls_idx]
            for x_small,y_small,x_large,y_large,prob in cls_dets:
                thefile.write(cls_name+' '+str(x_small)+' '+str(y_small)+' '+str(max(x_small+0.01,x_large))+' '+str(max(y_small+0.01,y_large))+' '+str(prob)+'\n')
        thefile.write("----next frame----")
Пример #27
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    model = '/../model/rfcn_vid'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_test_symbol(config)

    # set up class names
    num_classes = 31
    classes = [
        'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
        'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
        'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
        'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
        'watercraft', 'whale', 'zebra'
    ]

    # load demo data
    image_names = glob.glob(cur_path +
                            '/../demo/ILSVRC2015_val_00007010/*.JPEG')
    output_dir = cur_path + '/../demo/rfcn/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    #

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in range(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in range(len(data))]
    provide_label = [None for i in range(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 0, process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in range(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in range(len(data_batch.data))
        ]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)

    # test
    time = 0
    import datetime
    count = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in range(len(data_batch.data))
        ]

        tic()
        time_old = datetime.datetime.now()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        time += toc()
        count += 1
        print('testing {} {:.4f}s'.format(im_name, time / count))
        print('requests', (datetime.datetime.now() - time_old).microseconds)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)

        # visualize
        im = cv2.imread(im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        # show_boxes(im, dets_nms, classes, 1)
        out_im = draw_boxes(im, dets_nms, classes, 1)
        cv2.imshow("asdf", out_im)
        cv2.waitKey(0)
        _, filename = os.path.split(im_name)
        cv2.imwrite(output_dir + filename, out_im)

    print('done')
Пример #28
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
               'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
               'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
               'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
               'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
               'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
               'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
               'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

    # load demo data
    image_names = ['COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg']
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})


    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        im = cv2.imread(cur_path + '/../demo/' + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_boxes(im, dets_nms, classes, 1)

    print 'done'
Пример #29
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_deeplab_dcn' if not args.deeplab_only else 'resnet_v1_101_deeplab'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 19

    # load demo data
    image_names = ['frankfurt_000001_073088_leftImg8bit.png', 'lindau_000024_000019_leftImg8bit.png']
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})


    # get predictor
    data_names = ['data']
    label_names = ['softmax_label']
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/' + ('deeplab_dcn_cityscapes' if not args.deeplab_only else 'deeplab_cityscapes'), 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        output_all = predictor.predict(data_batch)
        output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all]

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])

        tic()
        output_all = predictor.predict(data_batch)
        output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all]
        pallete = getpallete(256)

        segmentation_result = np.uint8(np.squeeze(output_all))
        segmentation_result = Image.fromarray(segmentation_result)
        segmentation_result.putpalette(pallete)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        pure_im_name, ext_im_name = os.path.splitext(im_name)
        segmentation_result.save(cur_path + '/../demo/seg_' + pure_im_name + '.png')
        # visualize
        im_raw = cv2.imread(cur_path + '/../demo/' + im_name)
        seg_res = cv2.imread(cur_path + '/../demo/seg_' + pure_im_name + '.png')
        cv2.imshow('Raw Image', im_raw)
        cv2.imshow('segmentation_result', seg_res)
        cv2.waitKey(0)
    print 'done'