def read_files(dtype): tic() files = sorted(os.listdir('../out')) files = [f for f in files if f.startswith('features-') and f.endswith('-%s.csv' % dtype)] names, X = zip(*map(read_file, files)) toc('read %s data' % dtype) return names, np.concatenate(X, 1)
def extract(info_filename, pairs_filename, mode): info_filename = os.path.join('../data', info_filename) pairs_filename = os.path.join('../data', pairs_filename) tic() info_df = pd.read_csv( info_filename, dtype={'itemID': int, 'categoryID': int, 'price': float}, usecols=(0, 1, 6, 7, 8, 9, 10), index_col=0) info_df['line'] = np.arange(len(info_df), dtype=int) toc('info file') info_reader = MyCSVReader(info_filename) toc('info reader') cols = (0, 1) if mode == 'train' else (1, 2) pairs = np.genfromtxt(pairs_filename, int, delimiter=',', skip_header=1, usecols=cols) toc('pairs file') # transforma ItemID em linhas do ficheiro CSV e da matriz info a = info_df.ix[pairs[:, 0]]['line'] b = info_df.ix[pairs[:, 1]]['line'] pairs_lines = np.c_[a, b] toc('pairs lines') params = (info_filename, info_reader, info_df, pairs_lines) modules = [module[:-3] for module in sorted(os.listdir('features')) if module.startswith('extract-')] csvs = ['../out/features-%s-%s.csv' % (module[8:], mode) for module in modules] # create features from modules that have been created or changed #pool = multiprocessing.Pool(multiprocessing.cpu_count()/2) #res = [] for module, csv in itertools.izip(modules, csvs): #res.append(pool.apply_async(sync_extract, (module, csv, params))) sync_extract(module, csv, params) #for r in res: # r.get() # remove whatever has been created by extiguish modules vestiges = [os.path.join('../out', f) for f in os.listdir('../out') if f.startswith('features-') and f.endswith('-%s.csv' % mode)] for v in vestiges: if v not in csvs: print 'removing old %s...' % v os.remove(v)
def sync_extract(module, csv, params): create = not os.path.exists(csv) if not create: m1 = os.path.getmtime(csv) m2 = os.path.getmtime('features/' + module + '.py') create = m2 > m1 if create: tic() i = importlib.import_module('features.' + module) X, names = i.fn(*params) toc(module[8:]) if len(X): if len(X[0].shape) == 1: X = [x[:, np.newaxis] for x in X] X = np.concatenate(X, 1) assert X.shape[1] == len(names) #names = ['"' + name + '"' for name in names] header = ','.join(names) fmt = '%d' if X.dtype == int else '%.6f' np.savetxt(csv, X, fmt, delimiter=',', header=header, comments='')
def main(): # get symbol pprint.pprint(config) config.symbol = "resnet_v1_101_fpn_dcn_rcnn" if not args.rfcn_only else "resnet_v1_101_fpn_rcnn" sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # test # find all videos video_path = "../../tmp"#"../../aic2018/track1/track1_videos" video_files = sorted([ x for x in os.listdir(video_path) if x.endswith(".mp4")]) save_path = "../../tmp/output"#"../../aic2018/track1/output" if not os.path.isdir(save_path): os.makedirs(save_path) print("processing {} videos...".format(len(video_files))) pbar = tqdm(total=len(video_files)) for vf in video_files: vid = imageio.get_reader(os.path.join(video_path, vf),'ffmpeg') data = [] for idx, im in enumerate(vid): if idx == 0: #assert os.path.exists(im_path + im_name), ('%s does not exist'.format(im_path + im_name)) #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) else: break #data.append({'data': None, 'im_info': None}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] print("hhhhh") print(provide_data, provide_label) print("hhhhh") arg_params, aux_params = load_param(cur_path + '/../model/demo_model/' + ('fpn_dcn_coco' if not args.rfcn_only else 'fpn_coco'), 0, process=True) #print(type(arg_params), type(aux_params)) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) print("successfully load model") vout = [] # write to video writer = skvideo.io.FFmpegWriter(os.path.join(save_path, vf.replace(".mp4","_out.mp4")), outputdict={'-vcodec': 'libx264', '-b': '300000000'}) for frame_idx, im in enumerate(vid): #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) im_original = im.copy() target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data_idx = [{"data": im_tensor, "im_info": im_info}] data_idx = [[mx.nd.array(data_idx[i][name]) for name in data_names] for i in xrange(len(data_idx))] data_batch = mx.io.DataBatch(data=[data_idx[0]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data_idx[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] num_dets = 0 for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.65, :] dets_nms.append(cls_dets) num_dets += cls_dets.shape[0] print 'testing {} the {} th frame at {:.4f}s, detections {}'.format(vf, frame_idx, toc(), num_dets) # save results #im = cv2.imread(im_path + im_name) #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) #im_bbox = show_boxes(im, dets_nms, classes, 1) #cv2.imwrite(im_path + im_name.replace(".jpg", "_bbox.jpg"), im_bbox) save_im, outputs = show_boxes(im_original, dets_nms, classes, 1, False) #cv2.imwrite(os.path.join(save_path, "{}_{}.jpg".format(vf.replace(".mp4", ""), str(frame_idx).zfill(5))), save_im) writer.writeFrame(save_im) for out in outputs: vout.append([frame_idx] + out) # save the whole video detection into pickle file writer.close() with open(os.path.join(save_path, vf.replace(".mp4", "_detect.pkl")), "wb") as f: pickle.dump(vout, f, protocol=2) pbar.update(1) pbar.close() print 'done'
# -*- coding: utf-8 -*- import sys sys.dont_write_bytecode = True import os from utils.tictoc import tic, toc import pickle import numpy as np import pandas as pd from scipy import stats print 'load items info...' tic() Xinfo = pd.read_csv('../../data/ItemInfo_train.csv', index_col=0, usecols=[0, 1, 6, 7, 8, 9, 10]) toc() print 'load items pairs...' tic() Xpair = pd.read_csv('../../data/ItemPairs_train.csv', usecols=[0, 1, 2]) toc() # idxmap is an efficient mapping between item-id and row index # we could also use Xinfo.ix[indices], but this approach seems # slightly faster tic() print 'load items mapping...' if os.path.exists('idxmap.pickle'): with open('idxmap.pickle', 'rb') as f: idxmap = pickle.load(f) else: lastid = Xinfo.index[-1]
def main(): # get symbol pprint.pprint(config) #config.symbol = "resnet_v1_101_fpn_dcn_rcnn" if not args.rfcn_only else "resnet_v1_101_fpn_rcnn" config.symbol = "resnet_v1_101_fpn_dcn_rcnn" sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 5 classes = ["car", "bus", "van", "others"] # load demo videos im_path = '../../aic2018/track1/images/' image_names = [ x for x in os.listdir('../../aic2018/track1/images/') if (x.endswith(".jpg") and (x.startswith("9_1") or x.startswith("9_1")) ) and not x.endswith("_bbox.jpg") ] data = [] for idx, im_name in enumerate(image_names[:1]): if idx == 0: assert os.path.exists(im_path + im_name), ( '%s does not exist'.format(im_path + im_name)) im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) else: data.append({'data': None, 'im_info': None}) print(data) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[0][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] # what does provide_data and provide_label work for? provide_data = [[(k, v.shape) for k, v in zip(data_names, data[0])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] ## load parameters arg_params, aux_params = load_param(cur_path + '/../model/' + 'fpn_detrac', 1, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) print("successfully load model") # find all videos video_path = "../../tmp" video_files = [x for x in os.listdir(video_path) if x.endswith(".mp4")] save_path = "../../tmp/output" if not os.path.isdir(save_path): os.makedirs(save_path) print("processing {} videos...".format(len(video_files))) pbar = tqdm(total=len(video_files)) for vf in video_files: vid = imageio.get_reader(os.path.join(video_path, vf), 'ffmpeg') vout = [] for frame_idx, im in enumerate(vid): #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data_idx = [{"data": im_tensor, "im_info": im_info}] data_idx = [[ mx.nd.array(data_idx[i][name]) for name in data_names ] for i in xrange(len(data_idx))] data_batch = mx.io.DataBatch( data=[data_idx[0]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data_idx[0])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4: (j + 1 ) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} the {} th frame at {:.4f}s, detections {}'.format( vf, frame_idx, toc(), len(dets_nms)) # save results #im = cv2.imread(im_path + im_name) #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) #im_bbox = show_boxes(im, dets_nms, classes, 1) #cv2.imwrite(im_path + im_name.replace(".jpg", "_bbox.jpg"), im_bbox) save_im, outputs = show_boxes(im, dets_nms, classes, 1) #cv2.imwrite(os.path.join(save_path, "{}_{}.jpg".format(vf.replace(".mp4", ""), str(frame_idx).zfill(5))), save_im) for out in outputs: vout.append([frame_idx] + out) # save the whole video detection into pickle file with open(os.path.join(save_path, vf.replace(".mp4", ".pkl")), "wb") as f: pickle.dump(vout, f, protocol=2) pbar.update(1) pbar.close() print 'done'
def main(): parser = argparse.ArgumentParser() parser.add_argument("indir", type=lambda s: unicode(s, 'utf8'), help="Directory containing list of images") parser.add_argument("outfile", type=lambda s: unicode(s, 'utf8'), help="Path to write predictions") parser.add_argument("-d", "--device", type=int, default=0, help="Device ID to use") args = parser.parse_args() params = vars(args) # ---------------------------------------------------------- Read config ctx_id = [int(i) for i in config.gpus.split(',')] pprint.pprint(config) sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] config['gpus'] = str(params['device']) # ---------------------------------------------------------- Load Images image_path_list = [] data = [] scale_factor = 1.0 img_dir = osp.abspath(params['indir']) det_thresh = 0.7 # Load abs paths of images for f in sorted(os.listdir(img_dir)): _, f_ext = osp.splitext(f) if f_ext in ['.jpg', '.png', '.jpeg']: f_path = osp.join(img_dir, f) image_path_list.append(f_path) print 'Loading {} images into memory...'.format(len(image_path_list)) for image_path in image_path_list: im = cv2.imread(image_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) height, width = im.shape[:2] im = cv2.resize( im, (int(scale_factor * width), int(scale_factor * height))) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) print 'Loaded {} images'.format(len(image_path_list)) # ---------------------------------------------------------- Predict predictions = [] # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param( '/BS/orekondy2/work/opt/FCIS/model/fcis_coco', 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(ctx_id[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # warm up for i in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] _, _, _, _ = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, image_path in enumerate(image_path_list): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config) im_shapes = [ data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data)) ] if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in xrange(num_classes)] all_masks = [[] for _ in xrange(num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, num_classes): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, num_classes)] masks = [all_masks[j] for j in range(1, num_classes)] else: masks = masks[0][:, 1:, :, :] im_height = np.round(im_shapes[0][0] / scales[0]).astype('int') im_width = np.round(im_shapes[0][1] / scales[0]).astype('int') print(im_height, im_width) boxes = clip_boxes(boxes[0], (im_height, im_width)) result_masks, result_dets = gpu_mask_voting( masks, boxes, scores[0], num_classes, 100, im_width, im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0]) dets = [result_dets[j] for j in range(1, num_classes)] masks = [ result_masks[j][:, 0, :, :] for j in range(1, num_classes) ] print '{} testing {} {:.4f}s'.format(idx, image_path, toc()) # visualize for i in xrange(len(dets)): keep = np.where(dets[i][:, -1] > det_thresh) dets[i] = dets[i][keep] masks[i] = masks[i][keep] im = cv2.imread(image_path_list[idx]) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) org_height, org_width = cv2.imread(image_path_list[idx]).shape[:2] # im = cv2.resize(im,(int(scale_factor*org_width), int(scale_factor*org_height))) """ visualize all detections in one image :param im_array: [b=1 c h w] in rgb :param detections: [ numpy.ndarray([[x1 y1 x2 y2 score]]) for j in classes ] :param class_names: list of names in imdb :param scale: visualize the scaled image :return: """ detections = dets class_names = classes cfg = config scale = 1.0 person_idx = class_names.index('person') dets = detections[person_idx] msks = masks[person_idx] for mask_idx, (det, msk) in enumerate(zip(dets, msks)): inst_arr = np.zeros_like(im[:, :, 0]) # Create a 2D W x H array bbox = det[:4] * scale cod = bbox.astype(int) if im[cod[1]:cod[3], cod[0]:cod[2], 0].size > 0: msk = cv2.resize( msk, im[cod[1]:cod[3] + 1, cod[0]:cod[2] + 1, 0].T.shape) bimsk = (msk >= cfg.BINARY_THRESH).astype('uint8') # ------- Create bit-mask for this instance inst_arr[cod[1]:cod[3] + 1, cod[0]:cod[2] + 1] = bimsk # Add thresholded binary mask rs_inst_arr = scipy.misc.imresize(inst_arr, (org_height, org_width)) rle = mask.encode(np.asfortranarray(rs_inst_arr)) predictions.append({ 'image_path': image_path, 'label': 'person', 'segmentation': rle, 'bbox': bbox.tolist(), 'score': det[-1], }) del msk del bimsk del rs_inst_arr print 'Created {} predictions'.format(len(predictions)) # ---------------------------------------------------------- Write output with open(params['outfile'], 'wb') as wf: json.dump(predictions, wf, indent=2)
def main(video_file): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) arg_params, aux_params = load_param('./output/rfcn/road_obj/road_train_all/all/' + 'rfcn_road', 19, process=True) # set up class names; Don't count the background in, even we are treat the background as label '0' num_classes = 4 classes = ['vehicle', 'pedestrian', 'cyclist', 'traffic lights'] cap = cv2.VideoCapture(video_path) fps = math.floor(cap.get(5)) # based on testing, this code process every frame takes around 0.25s. So my interval take 0.25s ~= 7frames fps = 8 while (cap.isOpened()): frame_id = cap.get(1) ret, frame = cap.read() if frame_id % fps != 0: # print('Frame ID: {}'.format(str(frame_id))) cv2.imshow('video', frame) continue tic() data = [] target_size = config.SCALES[0][1] max_size = config.SCALES[0][1] frame, im_scale = resize(frame, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(frame, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] # print('Debug: [data] shape: {}, cont: {}'.format(type(data), data)) max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] # print('Debug: [max_data_shape] shape: {}, cont: {}'.format(type(max_data_shape), max_data_shape)) provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] # print('Debug: [provide_data] shape: {}, cont: {}'.format(type(provide_data), provide_data)) provide_label = [None for i in xrange(len(data))] # print('Debug: [provide_label] shape: {}, cont: {}'.format(type(provide_label), provide_label)) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # Process video frame image_names=['frame'] for idx, _ in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] # print('Debug: [scales] cont: {}'.format(scales)) scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) frame_with_bbox = draw_bbox_on_frame(frame, dets_nms, classes, scale=scales[0]) cv2.imshow('video', frame_with_bbox) print 'Processing frame {} in {:.4f}s'.format(frame_id, toc()) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows() print 'done'
def main(): # settings num_classes = 19 snip_len = 30 version = str(args.version) interv = args.interval num_ex = args.num_ex avg_acc = args.avg_acc # validate params if version not in ['18', '34', '50', '101']: raise ValueError( "Invalid Accel version '%s' - must be one of Accel-{18,34,50,101}" % version) if interv < 1: raise ValueError("Invalid interval %d - must be >=1" % interv) if num_ex < 1: raise ValueError("Invalid num_ex %d - must be >=1" % num_ex) # get symbol pprint.pprint(config) config.symbol = 'accel_' + version model1 = '/../model/rfcn_dff_flownet_vid' model2 = '/../model/accel-' + version sym_instance = eval(config.symbol + '.' + config.symbol)() key_sym = sym_instance.get_key_test_symbol(config) cur_sym = sym_instance.get_cur_test_symbol(config) path_demo_data = '/ebs/Accel/data/cityscapes/' path_demo_labels = '/ebs/Accel/data/cityscapes/' if path_demo_data == '' or path_demo_labels == '': raise ValueError("Must set path to demo data + labels") # load demo data image_names = sorted( glob.glob(path_demo_data + 'leftImg8bit_sequence/val/frankfurt/*.png')) image_names += sorted( glob.glob(path_demo_data + 'leftImg8bit_sequence/val/lindau/*.png')) image_names += sorted( glob.glob(path_demo_data + 'leftImg8bit_sequence/val/munster/*.png')) image_names = image_names[:snip_len * num_ex] label_files = sorted( glob.glob(path_demo_labels + 'gtFine/val/frankfurt/*trainIds.png')) label_files += sorted( glob.glob(path_demo_labels + 'gtFine/val/lindau/*trainIds.png')) label_files += sorted( glob.glob(path_demo_labels + 'gtFine/val/munster/*trainIds.png')) output_dir = cur_path + '/../demo/deeplab_dff/' if not os.path.exists(output_dir): os.makedirs(output_dir) key_frame_interval = interv # lb_pos = 19 image_names_trunc = [] for i in range(num_ex): snip_pos = i * snip_len if avg_acc: offset = i % interv else: offset = interv - 1 start_pos = lb_pos - offset image_names_trunc.extend(image_names[snip_pos + start_pos:snip_pos + start_pos + interv]) image_names = image_names_trunc data = [] key_im_tensor = None prev_im_tensor = None for idx, im_name in enumerate(image_names): assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) if idx % key_frame_interval == 0: key_im_tensor = im_tensor if prev_im_tensor is None: prev_im_tensor = im_tensor data.append({ 'data': im_tensor, 'im_info': im_info, 'data_key': prev_im_tensor, 'feat_key': np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1)) }) prev_im_tensor = im_tensor # get predictor data_names = ['data', 'data_key', 'feat_key'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('data_key', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model1, 0, process=True) arg_params_dcn, aux_params_dcn = load_param(cur_path + model2, 0, process=True) arg_params.update(arg_params_dcn) aux_params.update(aux_params_dcn) key_predictor = Predictor(key_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) cur_predictor = Predictor(cur_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[j]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[j]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if j % key_frame_interval == 0: output_all, feat = im_segment(key_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] else: data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) output_all, feat = im_segment(cur_predictor, data_batch) output_key = 'croped_score_output' if version == '101' else 'correction_output' output_all = [ mx.ndarray.argmax(output[output_key], axis=1).asnumpy() for output in output_all ] print "warmup done" # test time = 0 count = 0 hist = np.zeros((num_classes, num_classes)) lb_idx = 0 for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() if idx % key_frame_interval == 0: print '\n\nframe {} (key)'.format(idx) output_all, feat = im_segment(key_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] else: print '\nframe {} (intermediate)'.format(idx) data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) output_all, feat = im_segment(cur_predictor, data_batch) output_key = 'croped_score_output' if version == '101' else 'correction_output' output_all = [ mx.ndarray.argmax(output[output_key], axis=1).asnumpy() for output in output_all ] elapsed = toc() time += elapsed count += 1 print 'testing {} {:.4f}s [{:.4f}s]'.format(im_name, elapsed, time / count) pred = np.uint8(np.squeeze(output_all)) segmentation_result = Image.fromarray(pred) pallete = getpallete(256) segmentation_result.putpalette(pallete) _, im_filename = os.path.split(im_name) segmentation_result.save(output_dir + '/seg_' + im_filename) # compute accuracy label = None _, lb_filename = os.path.split(label_files[lb_idx]) im_comps = im_filename.split('_') lb_comps = lb_filename.split('_') # check if annotation available for frame if im_comps[1] == lb_comps[1] and im_comps[2] == lb_comps[2]: print 'label {}'.format(lb_filename) label = np.asarray(Image.open(label_files[lb_idx])) if lb_idx < len(label_files) - 1: lb_idx += 1 if label is not None: curr_hist = fast_hist(pred.flatten(), label.flatten(), num_classes) hist += curr_hist print 'mIoU {mIoU:.3f}'.format( mIoU=round(np.nanmean(per_class_iu(curr_hist)) * 100, 2)) print '(cum) mIoU {mIoU:.3f}'.format( mIoU=round(np.nanmean(per_class_iu(hist)) * 100, 2)) ious = per_class_iu(hist) * 100 print ' '.join('{:.03f}'.format(i) for i in ious) print '===> final mIoU {mIoU:.3f}'.format(mIoU=round(np.nanmean(ious), 2)) print 'done'
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn' model = '/../model/rfcn_vid' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_test_symbol(config) # set up class names num_classes = 31 classes = ['airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra'] # load demo data image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG') output_dir = cur_path + '/../demo/rfcn/' if not os.path.exists(output_dir): os.makedirs(output_dir) # data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test time = 0 count = 0 for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) time += toc() count += 1 print 'testing {} {:.4f}s'.format(im_name, time/count) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) # visualize im = cv2.imread(im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # show_boxes(im, dets_nms, classes, 1) out_im = draw_boxes(im, dets_nms, classes, 1) _, filename = os.path.split(im_name) cv2.imwrite(output_dir + filename,out_im) print 'done'
def process_image_fun(imagesPath=None, fileOp=None, vis=None, model_params_list=None, count=0): # init rfcn dcn detect model (mxnet) # model_params_list = init_detect_model() # num_classes = RFCN_DCN_CONFIG['num_classes'] # 0 is background, classes = RFCN_DCN_CONFIG['num_classes_name_list'] min_threshold = min(list( RFCN_DCN_CONFIG['need_label_thresholds'].values())) im_name = imagesPath all_can_read_image = [] data = [] all_can_read_image.append(im_name) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im_name, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] predictor = Predictor(model_params_list[0], data_names, label_names, context=[mx.gpu(1)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=model_params_list[1], aux_params=model_params_list[2]) nms = gpu_nms_wrapper(config.TEST.NMS, 0) for idx, im_name in enumerate(all_can_read_image): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > min_threshold, :] dets_nms.append(cls_dets) #print('testing {} {:.4f}s'.format(im_name, toc())) im = show_boxes_write_rg(im=im_name, dets=dets_nms, classes=classes, scale=1, vis=vis, fileOp=fileOp, count=count) return im
def Seg(self): for i in xrange(2): data_batch = mx.io.DataBatch( data=[self.data], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(self.data_names, self.data)]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] _, _, _, _ = im_detect(self.predictor, data_batch, self.data_names, scales, config) data_batch = mx.io.DataBatch( data=[self.data], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(self.data_names, self.data)]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, masks, data_dict = im_detect(self.predictor, data_batch, self.data_names, scales, config) #print masks #right im_shapes = [ data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data)) ] if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in xrange(self.num_classes)] all_masks = [[] for _ in xrange(self.num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, self.num_classes): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, self.num_classes)] masks = [all_masks[j] for j in range(1, self.num_classes)] else: masks = masks[0][:, 1:, :, :] im_height = np.round(im_shapes[0][0] / scales[0]).astype('int') im_width = np.round(im_shapes[0][1] / scales[0]).astype('int') print(im_height, im_width) boxes = clip_boxes(boxes[0], (im_height, im_width)) result_masks, result_dets = gpu_mask_voting( masks, boxes, scores[0], self.num_classes, 100, im_width, im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, self.ctx_id[0]) dets = [result_dets[j] for j in range(1, self.num_classes)] masks = [ result_masks[j][:, 0, :, :] for j in range(1, self.num_classes) ] for i in xrange(1, len(dets)): keep = np.where(dets[i][:, -1] > 1) dets[i] = dets[i][keep] masks[i] = masks[i][keep] keep = np.where(dets[0][:, -1] > 0.8) dets[0] = dets[0][keep] masks[0] = masks[0][keep] newmask = show_masks(self.fg, dets, masks, self.classes, config) #!!!!!!!! wrong mask self.result = newmask return newmask
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_deeplab_dcn' if not args.deeplab_only else 'resnet_v1_101_deeplab' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 19 # load demo data image_names = [ 'frankfurt_000001_073088_leftImg8bit.png', 'lindau_000024_000019_leftImg8bit.png' ] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ( '%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data'] label_names = ['softmax_label'] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in range(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in range(len(data))] provide_label = [None for i in range(len(data))] arg_params, aux_params = load_param( cur_path + '/../model/' + ('deeplab_dcn_cityscapes' if not args.deeplab_only else 'deeplab_cityscapes'), 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # warm up for j in range(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) output_all = predictor.predict(data_batch) output_all = [ mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all ] # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) tic() output_all = predictor.predict(data_batch) output_all = [ mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all ] pallete = getpallete(256) segmentation_result = np.uint8(np.squeeze(output_all)) segmentation_result = Image.fromarray(segmentation_result) segmentation_result.putpalette(pallete) print('testing {} {:.4f}s'.format(im_name, toc())) pure_im_name, ext_im_name = os.path.splitext(im_name) segmentation_result.save(cur_path + '/../demo/seg_' + pure_im_name + '.png') # visualize im_raw = cv2.imread(cur_path + '/../demo/' + im_name) seg_res = cv2.imread(cur_path + '/../demo/seg_' + pure_im_name + '.png') cv2.imshow('Raw Image', im_raw) cv2.imshow('segmentation_result', seg_res) cv2.waitKey(0) print('done')
def main(): # get symbol pprint.pprint(config) config.symbol = 'impression_network_dynamic_offset_sparse' model = '/../local_run_output/impression_dynamic_offset-lr-10000-times-neighbor-4-dense-4' first_sym_instance = eval(config.symbol + '.' + config.symbol)() key_sym_instance = eval(config.symbol + '.' + config.symbol)() cur_sym_instance = eval(config.symbol + '.' + config.symbol)() first_sym = first_sym_instance.get_first_test_symbol_impression(config) key_sym = key_sym_instance.get_key_test_symbol_impression(config) cur_sym = cur_sym_instance.get_cur_test_symbol_impression(config) # set up class names num_classes = 31 classes = [ 'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra' ] # load demo data image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00011005/*.JPEG') output_dir = cur_path + '/../demo/motion-prior-output-00011005/' if not os.path.exists(output_dir): os.makedirs(output_dir) key_frame_interval = 10 image_names.sort() data = [] for idx, im_name in enumerate(image_names): assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) if idx % key_frame_interval == 0: if idx == 0: data_oldkey = im_tensor.copy() data_newkey = im_tensor.copy() data_cur = im_tensor.copy() else: data_oldkey = data_newkey.copy() data_newkey = im_tensor else: data_cur = im_tensor shape = im_tensor.shape infer_height = int(np.ceil(shape[2] / 16.0)) infer_width = int(np.ceil(shape[3] / 16.0)) data.append({ 'data_oldkey': data_oldkey, 'data_newkey': data_newkey, 'data_cur': data_cur, 'im_info': im_info, 'impression': np.zeros( (1, config.network.DFF_FEAT_DIM, infer_height, infer_width)), 'key_feat_task': np.zeros( (1, config.network.DFF_FEAT_DIM, infer_height, infer_width)) }) # get predictor data_names = [ 'data_oldkey', 'data_cur', 'data_newkey', 'im_info', 'impression', 'key_feat_task' ] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data_oldkey', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('data_newkey', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('data_cur', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('impression', (1, 1024, 38, 63)), ('key_feat_task', (1, 1024, 38, 63)) ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 4, process=True) first_predictor = Predictor(first_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) key_predictor = Predictor(key_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) cur_predictor = Predictor(cur_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[j]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[j]) ]], provide_label=[None]) scales = [ data_batch.data[i][3].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if j % key_frame_interval == 0: # keyframe if j == 0: # first frame scores, boxes, data_dict, conv_feat, _, _, _ = im_detect_impression_online( first_predictor, data_batch, data_names, scales, config) feat_task = conv_feat impression = conv_feat else: # keyframe data_batch.data[0][-2] = impression data_batch.provide_data[0][-2] = ('impression', impression.shape) scores, boxes, data_dict, conv_feat, impression, feat_task = im_detect_impression_online( key_predictor, data_batch, data_names, scales, config) else: # current frame data_batch.data[0][-1] = feat_task data_batch.provide_data[0][-1] = ('key_feat_task', feat_task.shape) scores, boxes, data_dict, _, _, _, _ = im_detect_impression_online( cur_predictor, data_batch, data_names, scales, config) print "warmup done" # test time = 0 count = 0 for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][3].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() print(idx) if idx % key_frame_interval == 0: # keyframe if idx == 0: # first frame scores, boxes, data_dict, conv_feat, _, _, _ = im_detect_impression_online( first_predictor, data_batch, data_names, scales, config) feat_task = conv_feat impression = conv_feat feat_task_numpy = feat_task.asnumpy() np.save("features/impression_%s.npy" % (idx), feat_task_numpy) else: # keyframe data_batch.data[0][-2] = impression data_batch.provide_data[0][-2] = ('impression', impression.shape) scores, boxes, data_dict, conv_feat, impression, feat_task, _ = im_detect_impression_online( key_predictor, data_batch, data_names, scales, config) feat_task_key_numpy = feat_task.asnumpy() np.save("features/impression_%s.npy" % (idx), feat_task_key_numpy) else: # current frame data_batch.data[0][-1] = feat_task data_batch.provide_data[0][-1] = ('key_feat_task', feat_task.shape) scores, boxes, data_dict, _, _, _, feat_task_cur = im_detect_impression_online( cur_predictor, data_batch, data_names, scales, config) if idx >= 1: feat_task_cur_numpy = feat_task_cur.asnumpy() np.save("features/impression_%s.npy" % (idx), feat_task_cur_numpy) #import pdb;pdb.set_trace() time += toc() count += 1 print 'testing {} {:.4f}s'.format(im_name, time / count) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) # visualize im = cv2.imread(im_name) #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # show_boxes(im, dets_nms, classes, 1) out_im = draw_boxes(im, dets_nms, classes, 1) _, filename = os.path.split(im_name) cv2.imwrite(output_dir + filename, out_im) print 'done'
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_fpn_dcn_rcnn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) max_per_image = config.TEST.max_per_image # Print the test scales print("Train scales: %s" % str(config.SCALES)) print("Test scales: %s" % str(config.TEST_SCALES)) # load demo data #dataBaseDir = '/b_test/pkhan/datasets/Receipts/data/' dataBaseDir = '/netscratch/queling/data/' outputBaseDir = '/netscratch/queling/Deformable/output/fpn/deep_receipt/results/' + EXPERIMENT_NAME #outputBaseDir = '/b_test/pkhan/Code/Deformable/output/' + EXPERIMENT_NAME if os.path.exists(outputBaseDir): shutil.rmtree(outputBaseDir) os.mkdir(outputBaseDir) outputFile = open(os.path.join(outputBaseDir, 'output.txt'), 'w') outputFile.write('<?xml version="1.0" encoding="UTF-8"?>\n') errorStatsFile = open( os.path.join(outputBaseDir, 'incorrect-detections.txt'), 'w') incorrectDetectionResultsPath = os.path.join(outputBaseDir, 'IncorrectDetections') if not os.path.exists(incorrectDetectionResultsPath): os.mkdir(incorrectDetectionResultsPath) detectionResultsPath = os.path.join(outputBaseDir, 'Detections') if not os.path.exists(detectionResultsPath): os.mkdir(detectionResultsPath) annotationResultsPath = os.path.join(outputBaseDir, 'Annotations') if not os.path.exists(annotationResultsPath): os.mkdir(annotationResultsPath) statistics = {} for cls_ind, cls in enumerate(CLASSES): statistics[cls] = {} for thresh in IoU_THRESHOLDS: statistics[cls][thresh] = {} statistics[cls][thresh]["truePositives"] = 0 statistics[cls][thresh]["falsePositives"] = 0 statistics[cls][thresh]["falseNegatives"] = 0 statistics[cls][thresh]["precision"] = 0 statistics[cls][thresh]["recall"] = 0 statistics[cls][thresh]["fMeasure"] = 0 im_names_file = open(os.path.join(dataBaseDir, 'ImageSets/image.txt'), 'r') #test.txt for whole dataset, image.txt for one for im_name in im_names_file: im_name = im_name.strip() # print ("Processing file: %s" % (im_name)) found = False for ext in IMAGE_EXTENSIONS: im_name_with_ext = im_name + ext im_path = os.path.join( dataBaseDir, 'Test', im_name_with_ext) #Images for whole dataset, Test for one if os.path.exists(im_path): found = True break if not found: print("Error: Unable to locate file %s" % (im_name)) exit(-1) # Load GT annotations xml_path = os.path.join(dataBaseDir, 'Annotations', im_name + '.xml') #gtBBoxes = loadGTAnnotationsFromXML(xml_path) tic() dets_nms = [[] for j in range(len(TOTAL_CLASSES) - 1)] for testScale in config.SCALES: data = [] im = cv2.imread(im_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = testScale[0] max_size = testScale[1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, testScale[0], testScale[1]))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] # arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True) arg_params, aux_params = load_param(MODEL_PATH, MODEL_EPOCH, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # # warm up for j in xrange(2): data_batch = mx.io.DataBatch( data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test image_names = [im_name] # Way around for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') # TODO: Multi-scale testing for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4: 8] if config.CLASS_AGNOSTIC else boxes[:, j * 4: (j + 1 ) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) # if config.TEST.USE_SOFTNMS: # soft_nms = py_softnms_wrapper(config.TEST.SOFTNMS_THRESH, max_dets=max_per_image) # cls_dets = soft_nms(cls_dets) # else: # nms = py_nms_wrapper(config.TEST.NMS) # keep = nms(cls_dets) # cls_dets = cls_dets[keep, :] # cls_dets = cls_dets[cls_dets[:, -1] > confidenceThreshold, :] # dets_nms.append(cls_dets) if len(dets_nms[j - 1]) == 0: dets_nms[j - 1] = cls_dets else: dets_nms[j - 1] += cls_dets finalDetections = [] for clsIter in range(len(dets_nms)): # print ("Performing NMS on cls %d with %d boxes" % (clsIter, len(dets_nms[clsIter]))) if config.TEST.USE_SOFTNMS: soft_nms = py_softnms_wrapper(config.TEST.SOFTNMS_THRESH, max_dets=max_per_image) # cls_dets = soft_nms(dets_nms[clsIter]) dets_nms[clsIter] = soft_nms(dets_nms[clsIter]) else: nms = py_nms_wrapper(config.TEST.NMS) keep = nms(dets_nms[clsIter]) # cls_dets = dets_nms[clsIter][keep, :] dets_nms[clsIter] = dets_nms[clsIter][keep, :] dets_nms[clsIter] = dets_nms[clsIter][ dets_nms[clsIter][:, -1] > CONFIDENCE_THRESHOLD, :] # if max_per_image > 0: # for idx_im in range(0, num_images): # image_scores = np.hstack([all_boxes[j][idx_im][:, -1] # for j in range(1, imdb.num_classes)]) # if len(image_scores) > max_per_image: # image_thresh = np.sort(image_scores)[-max_per_image] # for j in range(1, imdb.num_classes): # keep = np.where(all_boxes[j][idx_im][:, -1] >= image_thresh)[0] # all_boxes[j][idx_im] = all_boxes[j][idx_im][keep, :] print 'Processing image: {} {:.4f}s'.format(im_name, toc()) # Add detections on the image im = cv2.imread( im_path) # Reload the image since the previous one was scaled item = 0 price = 0 asd = 0 row = 0 for cls_idx, cls_name in enumerate(CONCERNED_ERRORS): cls_dets = dets_nms[cls_idx] for det in cls_dets: predictedBBox = det[:4] cv2.rectangle(im, (int(predictedBBox[0]), int(predictedBBox[1])), (int(predictedBBox[2]), int(predictedBBox[3])), (0, 0, 255), 1) w = predictedBBox[2] - predictedBBox[0] cv2.putText(im, cls_name, (int(predictedBBox[0] + (w / 2.0) - 100), int(predictedBBox[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 255, 0), 1) crop_im = im[int(predictedBBox[1]):int(predictedBBox[3]), int(predictedBBox[0]):int(predictedBBox[2])] gray = cv2.cvtColor(crop_im, cv2.COLOR_BGR2GRAY) if cls_name == "price": asd = price + 1 price = price + 1 new_path = outputBaseDir + "/price/" if not os.path.exists(new_path): os.makedirs(new_path) outputImagePath = os.path.join( new_path, cls_name + str(asd) + ".jpg") # print ("Writing image: %s" % (outputImagePath)) cv2.imwrite(outputImagePath, crop_im) elif cls_name == "item_name": item = item + 1 asd = item new_path = outputBaseDir + "/item/" if not os.path.exists(new_path): os.makedirs(new_path) outputImagePath = os.path.join( new_path, cls_name + str(asd) + ".jpg") # print ("Writing image: %s" % (outputImagePath)) gray = cv2.medianBlur(gray, 3) cv2.imwrite(outputImagePath, gray) elif cls_name == "row": row = row + 1 asd = row new_path = outputBaseDir + "/row/" if not os.path.exists(new_path): os.makedirs(new_path) outputImagePath = os.path.join( new_path, cls_name + str(asd) + ".jpg") # print ("Writing image: %s" % (outputImagePath)) gray = cv2.medianBlur(gray, 3) cv2.imwrite(outputImagePath, gray) elif cls_name == 'total_price': print("Found Total") new_path = outputBaseDir + "/total/" if not os.path.exists(new_path): os.makedirs(new_path) outputImagePath = os.path.join(new_path, cls_name + ".jpg") # print ("Writing image: %s" % (outputImagePath)) gray = cv2.medianBlur(gray, 3) cv2.imwrite(outputImagePath, gray) elif cls_name == 'header': new_path = outputBaseDir + "/header/" if not os.path.exists(new_path): os.makedirs(new_path) outputImagePath = os.path.join(new_path, cls_name + ".jpg") # print ("Writing image: %s" % (outputImagePath)) gray = cv2.medianBlur(gray, 3) cv2.imwrite(outputImagePath, gray) outputImagePath = os.path.join(outputBaseDir, cls_name + str(asd) + ".jpg") # print ("Writing image: %s" % (outputImagePath)) cv2.imwrite(outputImagePath, crop_im) text = pytesseract.image_to_string(Image.open(outputImagePath)) #if text != "": # print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") #print("") #print(cls_name+": "+text) #print(" ") items = [] for k in range(1, item): path_item = outputBaseDir + "/item/item_name" + str(k) + ".jpg" text_item = pytesseract.image_to_string(Image.open(path_item)) #text_item = spellCheck.main(text_item, "product") print(str(k) + ": " + text_item) if text_item == "": print("empty and not relevant") #print(type(text_item)) else: import unicodedata #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')) #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))) items = items + [text_item] print("-------------------------------------------------------------") prices = [] for k in range(1, price): path_item = outputBaseDir + "/price/price" + str(k) + ".jpg" text_item = pytesseract.image_to_string(Image.open(path_item), config="--psm 13") print(str(k) + ": " + text_item) if text_item == "": print("empty and not relevant") #print(type(text_item)) else: import unicodedata #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')) #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))) prices = prices + [text_item] print( "-------------------------------------------------------------" ) rows = [] for k in range(1, row): path_item = outputBaseDir + "/row/row" + str(k) + ".jpg" text_item = pytesseract.image_to_string(Image.open(path_item)) if text_item == "": print("empty and not relevant") #print(type(text_item)) else: import unicodedata #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')) #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))) rows = rows + [text_item] print(str(k) + ": " + text_item) # write total in result.txt path_item = outputBaseDir + "/total/total_price.jpg" text_item = pytesseract.image_to_string(Image.open(path_item)) f = open("/netscratch/queling/Deformable/fpn/results.txt", "a") f.write(text_item + "\n") f.close() #path_item = outputBaseDir+"/header/header.jpg" #text_item = pytesseract.image_to_string(Image.open(path_item)) #print("Header: "+text_item) found = False for k in range(0, len(items)): for l in range(0, len(rows)): #print(type(items[k])) #print(type(rows[l])) if items[k].encode('ascii', 'ignore') in rows[l].encode( 'ascii', 'ignore'): for m in range(0, len(prices)): #print(type(prices[m].encode('ascii' ,'ignore'))) if prices[k].encode('ascii', 'ignore') in rows[l].encode( 'ascii', 'ignore'): #items[k] = spellCheck.main(items[k], "product") f = open( "/netscratch/queling/Deformable/fpn/results.txt", "a") f.write(items[k] + "\n") f.write(str(prices[m]) + "\n") f.close() found = True # Product not found in row if (found == False): #items[k] = spellCheck.main(items[k], "product") f = open("/netscratch/queling/Deformable/fpn/results.txt", "a") f.write(items[k] + "\n") f.write(" " + "\n") f.close() found = False # Add gt annotations #for bbox in gtBBoxes: # if bbox[5] in CONCERNED_ERRORS: # cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1) # Computate the statistics for the current image #statistics, classificationErrorMessage = computeStatistics(dets_nms, gtBBoxes, statistics, IoU_THRESHOLDS) #if classificationErrorMessage is not None: # print ("Writing incorrect image: %s" % (im_name)) # errorStatsFile.write("%s: %s\n" % (im_name, classificationErrorMessage)) # cv2.imwrite(os.path.join(incorrectDetectionResultsPath, im_name + '.jpg'), im) # Write the output in ICDAR Format outputFile.write(convertToXML(im_name_with_ext, dets_nms)) if WRITE_DETECTION_RESULTS: # visualize # im = cv2.imread(im_path) # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # # Get also the plot for saving on server # _, plt = show_boxes(im, dets_nms, CLASSES, 1, returnPlt=True) # plt.savefig(os.path.join(outputBaseDir, 'Detections', im_name[:im_name.rfind('.')] + ".png")) outputImagePath = os.path.join(detectionResultsPath, im_name + ".jpg") print("Writing image: %s" % (outputImagePath)) cv2.imwrite(outputImagePath, im) if WRITE_ANNOTATION_RESULTS: exportToPascalVOCFormat(im_name, im_path, dets_nms, annotationResultsPath) outputFile.close() errorStatsFile.close() total_classes = 0 total_F_Meausere = 0 average_F_Meausere = 0 # Compute final precision and recall outputFile = open( os.path.join(outputBaseDir, 'output-stats-' + EXPERIMENT_NAME + '.txt'), 'w')
def main(): import argparse """Parse input arguments.""" parser = argparse.ArgumentParser(description='FCIS demo') parser.add_argument('--cfg', dest='cfg_file', help='required config file (YAML file)', required=True, type=str) parser.add_argument('--model', dest='model', help='path to trained model (.params file)', required=True, type=str) parser.add_argument('--img_dir', dest='img_dir', help='path to directory of images for demo', required=True, type=str) parser.add_argument('--min_score', dest='min_score', help='Minimum score. Default 0.85', default=0.85, type=float) parser.add_argument('--save', dest='save', help='Saves inference data per image as JSON files (stored in img_dir directory)', action='store_true') parser.add_argument('--novis', dest='novis', help='Turn off visualization of inference', action='store_true') parser.add_argument('--wait', dest='wait', help='Set the wait time in between frames in opencv waitKey() (default 0 i.e. pause until button pressed)', default=0, type=int) args = parser.parse_args() # load image demo directory img_dir = args.img_dir assert osp.exists(img_dir), ("Could not find image directory %s"%(img_dir)) image_names = nts(glob.glob(osp.join(img_dir,"*.jpg"))) if len(image_names) == 0: print("No files in %s"%(img_dir)) return cfg_path = args.cfg_file model_path = args.model # load net fcis_net = FCISNet(cfg_path, model_path) CLASSES = fcis_net.classes # test: run predictions CONF_THRESH = args.min_score print("Using min score of %.3f...\n"%(CONF_THRESH)) for idx, im_name in enumerate(image_names): im = cv2.imread(im_name, cv2.IMREAD_COLOR)# | cv2.IMREAD_IGNORE_ORIENTATION) if im is None: print("Could not read %s"%(im_name)) continue # im_copy = im.copy() tic() dets, masks = fcis_net.forward(im, conf_thresh=CONF_THRESH) print('inference time %s: %.4fs'%(im_name, toc())) if args.save: im_name_basename = im_name[:im_name.rfind('.')] json_file = osp.join(img_dir, im_name_basename + ".json") reformatted_data = reformat_data(dets, masks, CLASSES) write_reformat_data_json(reformatted_data, json_file) # vis im_out_file = "/home/vincent/hd/deep_learning/tmp/FCIS/%s"%(im_name.split("/")[-1]) if not args.novis: plt_show = args.wait == 0 im_seg = show_masks(im, dets, masks, CLASSES, config.BINARY_THRESH, show=plt_show) im_seg = cv2.resize(im_seg, (960,540)) cv2.imwrite(im_out_file,im_seg) print("Saved to %s"%(im_out_file)) if not plt_show: cv2.imshow("seg", im_seg) cv2.waitKey(args.wait) print('\nDONE\n')
def main(): # get symbol ctx_id = [int(i) for i in config.gpus.split(',')] pprint.pprint(config) sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 2 classes = [ '__background__', # always index 0 '1' ] # load demo data image_names = [] names_dirs = os.listdir(cur_path + '/../' + test_dir) for im_name in names_dirs: if im_name[-4:] == '.jpg' or im_name[-4:] == '.png': image_names.append(im_name) data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../' + test_dir + im_name), ( '%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../' + test_dir + im_name, cv2.IMREAD_COLOR | long(128)) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] #print "before scale: " #print im.shape im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) #print "after scale: " #print im.shape #im_scale = 1.0 #print "scale ratio: " #print im_scale im_tensor = transform(im, config.network.PIXEL_MEANS) #print im_tensor.shape im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../' + model_dir, 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(ctx_id[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # warm up for i in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] _, _, _, _ = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, [1.0], config) im_shapes = [ data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data)) ] #print im_shapes if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in xrange(num_classes)] all_masks = [[] for _ in xrange(num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, num_classes): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, num_classes)] masks = [all_masks[j] for j in range(1, num_classes)] else: masks = masks[0][:, 1:, :, :] result_masks, result_dets = gpu_mask_voting( masks, boxes[0], scores[0], num_classes, 100, im_shapes[0][1], im_shapes[0][0], config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0]) dets = [result_dets[j] for j in range(1, num_classes)] masks = [ result_masks[j][:, 0, :, :] for j in range(1, num_classes) ] print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize for i in xrange(len(dets)): keep = np.where(dets[i][:, -1] > 0.7) dets[i] = dets[i][keep] masks[i] = masks[i][keep] im = cv2.imread(cur_path + '/../' + test_dir + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_masks(im, dets, masks, classes, config, 1.0 / scales[0], False) # Save img cv2.imwrite(cur_path + '/../' + result_dir + im_name, cv2.cvtColor(im, cv2.COLOR_BGR2RGB)) print 'done'
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_flownet_rfcn' model = '/../model/rfcn_dff_flownet_vid' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_batch_test_symbol(config) sym.save('dff_rfcn.json') #print config.network.get_internals() #mx.visualization.plot_network(sym).view() #print sym.get_intervals() #x = input() # set up class names num_classes = 31 classes = [ 'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra' ] # load demo data image_names = glob.glob(cur_path + '/../demo/sample/*.JPEG') output_dir = cur_path + '/../demo/rfcn_dff_batch/' if not os.path.exists(output_dir): os.makedirs(output_dir) key_frame_interval = 10 # data = [] key_im_tensor = None cur_im_tensor = [] im_info_tensor = [] image_names_list = [] image_names_batch = [] for idx, im_name in enumerate(image_names): assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread( im_name) #, cv2.IMREAD_COLOR)# | cv2.IMREAD_IGNORE_ORIENTATION) #im = cv2.resize(im, (176,176,3)) #height, width, channel = img.shape #gray = im = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) #im = np.zeros(height * width * channel).reshape((height, width, channel)) #im[:,:,0] = gray #im[:,:,1] = gray #im[:,:,2] = gray target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) #print im.shape #print im_scale.shape im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) if idx % key_frame_interval == 0: key_im_tensor = im_tensor else: cur_im_tensor.append(im_tensor) im_info_tensor.append(im_info) image_names_batch.append(im_name) if (idx + 1) % key_frame_interval == 0 or idx == len(image_names) - 1: data.append({ 'data_other': np.concatenate(cur_im_tensor), 'im_info': np.concatenate(im_info_tensor), 'data_key': key_im_tensor }) key_im_tensor = None cur_im_tensor = [] im_info_tensor = [] image_names_list.append(image_names_batch) image_names_batch = [] # get predictor data_names = ['data_other', 'im_info', 'data_key'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data_other', (key_frame_interval - 1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('data_key', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) #print predictor nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(1): data_batch = mx.io.DataBatch(data=[data[j]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[j]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[:, 2] for i in xrange(len(data_batch.data)) ] print scales[0].shape scores_all, boxes_all, data_dict = im_batch_detect( predictor, data_batch, data_names, scales, config) print "warmup done" # test time = 0 count = 0 for idx, im_names in enumerate(image_names_list): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[:, 2] for i in xrange(len(data_batch.data)) ] tic() scores_all, boxes_all, data_dict = im_batch_detect( predictor, data_batch, data_names, scales, config) time += toc() count += len(scores_all) print 'testing {} {:.4f}s x {:d}'.format(im_names[0], time / count, len(scores_all)) ''' for batch_idx in xrange(len(scores_all)): boxes = boxes_all[batch_idx].astype('f') scores = scores_all[batch_idx].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) # visualize im = cv2.imread(im_names[batch_idx]) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # show_boxes(im, dets_nms, classes, 1) out_im = draw_boxes(im, dets_nms, classes, 1) _, filename = os.path.split(im_names[batch_idx]) cv2.imwrite(output_dir + filename,out_im) ''' print 'done'
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] # load demo data image_names = [ 'COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg' ] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ( '%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param( cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize im = cv2.imread(cur_path + '/../demo/' + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_boxes(im, dets_nms, classes, 1) print 'done'
def process_video_frame(raw_frame_queue, bbox_frame_queue): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) arg_params, aux_params = load_param( './output/rfcn/road_obj/road_train_all/all/' + 'rfcn_road', 19, process=True) # set up class names; Don't count the background in, even we are treat the background as label '0' num_classes = 4 classes = ['vehicle', 'pedestrian', 'cyclist', 'traffic lights'] target_size = config.SCALES[0][1] max_size = config.SCALES[0][1] while True: tic() i = 0 data = [] frame_list = [] while len(data) < 15: frame = raw_frame_queue.get() if frame is None: continue if i < 2: i += 1 frame, im_scale = resize(frame, target_size, max_size, stride=config.network.IMAGE_STRIDE) bbox_frame_queue.put(frame) continue frame, im_scale = resize(frame, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(frame, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) frame_list.append(frame) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] # print('Debug: [data] shape: {}, cont: {}'.format(type(data), data)) max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] # print('Debug: [max_data_shape] shape: {}, cont: {}'.format(type(max_data_shape), max_data_shape)) provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] # print('Debug: [provide_data] shape: {}, cont: {}'.format(type(provide_data), provide_data)) provide_label = [None for i in xrange(len(data))] # print('Debug: [provide_label] shape: {}, cont: {}'.format(type(provide_label), provide_label)) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # Process video frame # image_names = ['frame'] # for idx, frame in enumerate(frame_list): data_batch = mx.io.DataBatch(data=data, label=[], pad=0, provide_data=provide_data, provide_label=provide_label) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] # print("length: {}".format(len(data_batch.data))) # print('Debug: [scales] cont: {}'.format(scales)) scores_all, boxes_all, data_dict_all = im_detect( predictor, data_batch, data_names, scales, config) # print('scores_all: Type: {}, Values: {}, Length: {}'.format(type(scores_all), scores_all, len(scores_all))) # print('boxes_all: Type: {}, Values: {}, Length: {}'.format(type(boxes_all), boxes_all, len(boxes_all))) # print('data_dict_all: Type: {}, Values: {}, length: {}'.format(type(data_dict_all), data_dict_all, len(data_dict_all))) # print('frame_list: Type: {}, Values: {}, Length: {}'.format(type(frame_list), frame_list, len(frame_list))) # print('scores_all: Type: {}, Length: {}, Values: {}'.format(type(scores_all[0]), len(scores_all[0]), scores_all[0])) # print(scores_all[0].shape) # print('boxes_all: Type: {}, Length: {}'.format(type(boxes_all), len(boxes_all))) # print(boxes_all[0].shape) # print('data_dict_all: Type: {}, length: {}'.format(type(data_dict_all), len(data_dict_all))) # print('frame_list: Type: {}, Length: {}'.format(type(frame_list), len(frame_list))) for idx, frame in enumerate(frame_list): # print('index: {}'.format(str(idx))) boxes = boxes_all[0].astype('f') scores = scores_all[0].astype('f') dets_nms = [] # print(scores.shape) for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4: (j + 1 ) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) bbox_frame_queue.put( draw_bbox_on_frame(frame, dets_nms, classes, scale=scales[idx])) print(toc())
def predict_on_image_names( image_names, config, model_path_id="/home/data/output/resnet_v1_101_coco_fcis_end2end_ohem-nebraska/train-nebraska/e2e", epoch=8): import argparse import os import sys import logging import pprint import cv2 from utils.image import resize, transform import numpy as np # get config os.environ['PYTHONUNBUFFERED'] = '1' os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' os.environ['MXNET_ENABLE_GPU_P2P'] = '0' cur_path = os.path.abspath(".") sys.path.insert( 0, os.path.join(cur_path, '../external/mxnet', config.MXNET_VERSION)) import mxnet as mx print("use mxnet at", mx.__file__) from core.tester import im_detect, Predictor from symbols import * from utils.load_model import load_param from utils.show_masks import show_masks from utils.tictoc import tic, toc from nms.nms import py_nms_wrapper from mask.mask_transform import gpu_mask_voting, cpu_mask_voting # get symbol ctx_id = [int(i) for i in config.gpus.split(',')] sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 2 classes = ['cp'] # load demo data data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] # loading the last epoch that was trained, 8 arg_params, aux_params = load_param(model_path_id, epoch, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(ctx_id[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) all_classes = [] all_configs = [] all_masks = [] all_dets = [] all_ims = [] # warm up for i in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] _, _, _, _ = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config) im_shapes = [ data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data)) ] if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in xrange(num_classes)] all_masks = [[] for _ in xrange(num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, num_classes): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, num_classes)] masks = [all_masks[j] for j in range(1, num_classes)] else: masks = masks[0][:, 1:, :, :] im_height = np.round(im_shapes[0][0] / scales[0]).astype('int') im_width = np.round(im_shapes[0][1] / scales[0]).astype('int') print(im_height, im_width) boxes = clip_boxes(boxes[0], (im_height, im_width)) result_masks, result_dets = gpu_mask_voting( masks, boxes, scores[0], num_classes, 100, im_width, im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0]) dets = [result_dets[j] for j in range(1, num_classes)] masks = [ result_masks[j][:, 0, :, :] for j in range(1, num_classes) ] print('testing {} {:.4f}s'.format(im_name, toc())) # visualize for i in xrange(len(dets)): keep = np.where(dets[i][:, -1] > 0.7) dets[i] = dets[i][keep] masks[i] = masks[i][keep] all_classes.append(classes) all_configs.append(config) all_masks.append(masks) all_dets.append(dets) im = cv2.imread(im_name) all_ims.append(im) return all_ims, all_dets, all_masks, all_configs, all_classes
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_flownet_rfcn' model = '/../model/rfcn_dff_flownet_vid' sym_instance = eval(config.symbol + '.' + config.symbol)() key_sym = sym_instance.get_key_test_symbol(config) cur_sym = sym_instance.get_cur_test_symbol(config) # set up class names num_classes = 31 classes = [ 'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra' ] # load demo data image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG') image_names.sort() output_dir = cur_path + '/../demo/rfcn_dff/' if not os.path.exists(output_dir): os.makedirs(output_dir) key_frame_interval = 10 # data = [] key_im_tensor = None for idx, im_name in enumerate(image_names): assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) if idx % key_frame_interval == 0: key_im_tensor = im_tensor data.append({ 'data': im_tensor, 'im_info': im_info, 'data_key': key_im_tensor, 'feat_key': np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1)) }) # get predictor data_names = ['data', 'im_info', 'data_key', 'feat_key'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('data_key', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) key_predictor = Predictor(key_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) cur_predictor = Predictor(cur_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[j]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[j]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if j % key_frame_interval == 0: scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config) else: data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config) print "warmup done" # test time = 0 count = 0 for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() mark = '' if idx % key_frame_interval == 0: scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config) mark = '+' else: data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config) time_elapsed = toc() time += time_elapsed count += 1 print 'testing {} {:.4f}s {:.4f}s {}'.format(im_name, time / count, time_elapsed, mark) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) # visualize im = cv2.imread(im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # show_boxes(im, dets_nms, classes, 1) out_im = draw_boxes(im, dets_nms, classes, 1) _, filename = os.path.split(im_name) cv2.imwrite(output_dir + filename, out_im) if enable_cv2_imshow: cv2.imshow('out_im', out_im) cv2.waitKey(1) print 'done'
def process_one_batch_images_fun(isUrlFlag=False, one_batch_images_list=None, init_model_param=None, fileOp=None, vis=None): num_classes = RFCN_DCN_CONFIG['num_classes'] # 0 is background, classes = RFCN_DCN_CONFIG['num_classes_name_list'] image_names = one_batch_images_list if len(image_names) <= 0: return all_can_read_image = [] data = [] for im_name in image_names: #print("process : %s"%(im_name)) im = readImage_fun(isUrlFlag=isUrlFlag, imagePath=im_name) # 判断 这个图片是否可读 if np.shape(im) == (): print("ReadImageError : %s" % (im_name)) continue if im.shape[2] != 3: print("%s channel is not 3" % (im_name)) continue all_can_read_image.append(im_name) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] predictor = Predictor(init_model_param[0], data_names, label_names, context=[mx.gpu(int(args.gpuId))], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=init_model_param[1], aux_params=init_model_param[2]) nms = gpu_nms_wrapper(config.TEST.NMS, 0) for idx, im_name in enumerate(all_can_read_image): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > min_threshold, :] dets_nms.append(cls_dets) print('testing {} {:.4f}s'.format(im_name, toc())) show_boxes(isUrlFlag=isUrlFlag, im_name=im_name, dets=dets_nms, classes=classes, scale=1, vis=vis, fileOp=fileOp, flag=args.outputFileFlag) print('process one batch images done') pass
def main(): # get symbol ctx_id = [int(i) for i in config.gpus.split(',')] pprint.pprint(config) sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] # load demo data image_names = [ 'COCO_test2015_000000000275.jpg', 'COCO_test2015_000000001412.jpg', 'COCO_test2015_000000073428.jpg', 'COCO_test2015_000000393281.jpg' ] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ( '%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/fcis_coco', convert=True, 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.cpu()], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config) im_shapes = [ data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data)) ] if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in xrange(num_classes)] all_masks = [[] for _ in xrange(num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, num_classes): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, num_classes)] masks = [all_masks[j] for j in range(1, num_classes)] else: masks = masks[0][:, 1:, :, :] im_height = np.round(im_shapes[0][0] / scales[0]).astype('int') im_width = np.round(im_shapes[0][1] / scales[0]).astype('int') print(im_height, im_width) boxes = clip_boxes(boxes[0], (im_height, im_width)) result_masks, result_dets = cpu_mask_voting( masks, boxes, scores[0], num_classes, 100, im_width, im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH) dets = [result_dets[j] for j in range(1, num_classes)] masks = [ result_masks[j][:, 0, :, :] for j in range(1, num_classes) ] print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize for i in xrange(len(dets)): keep = np.where(dets[i][:, -1] > 0.7) dets[i] = dets[i][keep] masks[i] = masks[i][keep] for i in range(len(dets)): if len(dets[i]) > 0: for j in range(len(dets[i])): print('{name}: {score} ({loc})'.format( name=classes[i], score=dets[i][j][-1], loc=dets[i][j][:-1].tolist())) print 'done'
def Pcs_each_vdo(vid_list, sv_dir, GPU): # process each_video for order, ec_vid in enumerate(vid_list): ec_vid = str(ec_vid) tic() fm_lis = glob.glob(ec_vid + '/*') fm_num = len(fm_lis) # OK count right vd_name = ec_vid.split('/')[-1] sv_ph = sv_dir + '/' + vd_name # judge whether the json exist json_fl = sv_ph + '.json' # if exist and the key length is frame then PASS if os.path.exists(json_fl): with open(json_fl, 'r') as load_f: load_file = json.load(load_f) det_frm_nm = len(list(load_file.keys())) if det_frm_nm == fm_num: print 'the file name ', order + 1, ' ', vd_name, ' Exist OK Passed' continue print 'The next vid name: ', vd_name, ' Frm num: ', fm_num jpg_bs_name = fm_lis[0][:-10] frm_list = [ jpg_bs_name + '%06d.jpg' % No for No in range(1, fm_num + 1) ] ft_im = cv2.imread(frm_list[0], 1 | 128) ft_im, im_scale = resize(ft_im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tenssp = transform(ft_im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tenssp.shape[2], im_tenssp.shape[3], im_scale]], dtype=np.float32) data = [] # for fm_dir in frm_list: # to change back to change back to change back to change back to change back to change back to change back # ==================== get each video frames and generate data ==================== for fm_dir in frm_list: fm = cv2.imread(fm_dir, 1 | 128) #target_size #max_size # im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) fm, _ = resize(fm, target_size, max_size, stride=config.network.IMAGE_STRIDE) fm_sptensor = transform(fm, config.network.PIXEL_MEANS) # im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': fm_sptensor, 'im_info': im_info}) for x in locals().keys(): del locals()[x] Memery = gc.collect() del fm_sptensor, fm Memery = gc.collect() # process data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(GPU)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # nms discleared PASSED # test # ==================== process each to get bbox ==================== each_video_det = {} for idx, im_name in enumerate(frm_list): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4: (j + 1 ) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] if len(cls_dets) > 0: dets_nms.append( np.insert(cls_dets, 5, values=j, axis=1).tolist()) each_video_det[im_name.split('/')[-1][-10:-4]] = dets_nms # print 'testing {} {:.4f}s'.format(im_name, toc()) # # visualize # im = cv2.imread(im_name) # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # show_boxes(im, dets_nms, classes, 1) save_each_video_det(video_det=each_video_det, save_dir=sv_dir, video_name=vd_name) print order + 1, '/', len(vid_list), 'time {:.4f}s'.format( toc()), vd_name
def inference_rcnn_AICity(cfg, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None): if not logger: assert False, 'require a logger' # print cfg pprint.pprint(cfg) logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg))) # load symbol and testing data if has_rpn: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol(cfg, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) roidb = imdb.gt_roidb_Shuo() #roidb = imdb.gt_roidb() else: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rfcn(cfg, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) gt_roidb = imdb.gt_roidb_Shuo() roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb) print 'len(roidb):',len(roidb) # get test data iter test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn) # load model arg_params, aux_params = load_param(prefix, epoch, process=True) print 'inferring: ',prefix,' epoch: ',epoch """# write parameters to file print 'type(arg_params):',type(arg_params) print 'type(aux_params):',type(aux_params) thefile1 = open('/raid10/home_ext/Deformable-ConvNets/data/data_Shuo/UADETRAC/arg_params.txt','w') thefile2 = open('/raid10/home_ext/Deformable-ConvNets/data/data_Shuo/UADETRAC/aux_params.txt','w') for item_arg in arg_params.items(): thefile1.write(item_arg[0] + str(type(item_arg[1])) + str(item_arg[1].shape)+'\n') for item_aux in aux_params.items(): thefile2.write(item_aux[0] + str(type(item_aux[1])) + str(item_aux[1].shape)+'\n') """ # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = None max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] if not has_rpn: max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(cfg.TEST.NMS, 0) # start detection # pred_eval(predictor, test_data, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger) print 'test_data.size',test_data.size print 'test_data:',test_data print 'data_names:',data_names print 'test_data.provide_data:',test_data.provide_data print 'test_data.provide_label:',test_data.provide_label nnn = 0 #classes = ['__background','vehicle'] classes = ['Car','SUV','SmallTruck','MediumTruck','LargeTruck','Pedestrian','Bus','Van','GroupOfPeople','Bicycle', 'Motorcycle'] #,'Pedestrian', 'GroupOfPeople','Bicycle', 'Motorcycle','TrafficSignal-Green', 'TrafficSignal-Yellow', 'TrafficSignal-Red' for im_info, data_batch in test_data: print nnn #print 'roidb[nnn]:',roidb[nnn]['image'] image_name = roidb[nnn]['image'] tic() scales = [iim_info[0, 2] for iim_info in im_info] scores_all, boxes_all, data_dict_all = im_detect(predictor, data_batch, data_names, scales, cfg) boxes = boxes_all[0].astype('f') scores = scores_all[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] threshold = 0.2 # confidence thrshold between 0 and 1 cls_dets = cls_dets[cls_dets[:, -1] > threshold, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(image_name, toc()) # visualize im = cv2.imread(image_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) #print 'cls_dets:',cls_dets show_boxes(im, dets_nms, classes, 1) nnn = nnn + 1 image_name_length = len(image_name.split('/')) magefile_name = image_name.split('/')[image_name_length-1] image_name_lean = image_name.split('.')[0] if not os.path.exists(os.path.join('data', 'output')): os.makedirs(os.path.join('data', 'output')) import datetime,time output_file = os.path.join('data', 'output', str(datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) + '.txt') thefile = open(output_file,'a') #det_id = 0 #for x_small,y_small,x_large,y_large,prob in dets_nms[0]: #det_id += 1 for cls_idx, cls_name in enumerate(classes): cls_dets = dets_nms[cls_idx] for x_small,y_small,x_large,y_large,prob in cls_dets: thefile.write(cls_name+' '+str(x_small)+' '+str(y_small)+' '+str(max(x_small+0.01,x_large))+' '+str(max(y_small+0.01,y_large))+' '+str(prob)+'\n') thefile.write("----next frame----")
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn' model = '/../model/rfcn_vid' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_test_symbol(config) # set up class names num_classes = 31 classes = [ 'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra' ] # load demo data image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG') output_dir = cur_path + '/../demo/rfcn/' if not os.path.exists(output_dir): os.makedirs(output_dir) # data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in range(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in range(len(data))] provide_label = [None for i in range(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in range(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in range(len(data_batch.data)) ] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test time = 0 import datetime count = 0 for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in range(len(data_batch.data)) ] tic() time_old = datetime.datetime.now() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) time += toc() count += 1 print('testing {} {:.4f}s'.format(im_name, time / count)) print('requests', (datetime.datetime.now() - time_old).microseconds) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) # visualize im = cv2.imread(im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # show_boxes(im, dets_nms, classes, 1) out_im = draw_boxes(im, dets_nms, classes, 1) cv2.imshow("asdf", out_im) cv2.waitKey(0) _, filename = os.path.split(im_name) cv2.imwrite(output_dir + filename, out_im) print('done')
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # load demo data image_names = ['COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg'] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize im = cv2.imread(cur_path + '/../demo/' + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_boxes(im, dets_nms, classes, 1) print 'done'
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_deeplab_dcn' if not args.deeplab_only else 'resnet_v1_101_deeplab' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 19 # load demo data image_names = ['frankfurt_000001_073088_leftImg8bit.png', 'lindau_000024_000019_leftImg8bit.png'] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data'] label_names = ['softmax_label'] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/' + ('deeplab_dcn_cityscapes' if not args.deeplab_only else 'deeplab_cityscapes'), 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) output_all = predictor.predict(data_batch) output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all] # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) tic() output_all = predictor.predict(data_batch) output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all] pallete = getpallete(256) segmentation_result = np.uint8(np.squeeze(output_all)) segmentation_result = Image.fromarray(segmentation_result) segmentation_result.putpalette(pallete) print 'testing {} {:.4f}s'.format(im_name, toc()) pure_im_name, ext_im_name = os.path.splitext(im_name) segmentation_result.save(cur_path + '/../demo/seg_' + pure_im_name + '.png') # visualize im_raw = cv2.imread(cur_path + '/../demo/' + im_name) seg_res = cv2.imread(cur_path + '/../demo/seg_' + pure_im_name + '.png') cv2.imshow('Raw Image', im_raw) cv2.imshow('segmentation_result', seg_res) cv2.waitKey(0) print 'done'