def evaluate(evl, solver, net): # Initialize the net with the first frame im_path, gt = evl.init_frame() im = cv2.imread(im_path) samples = gaussian_sample(im, gt, PARAMS, 5500) db = [] for i in range(len(samples)): db.append({ 'path': im_path, 'img': im, 'gt': gt, 'samples': [samples[i]] }) solver.net.layers[0].get_db(db) solver.step(1000) for i in range(1): im_path = evl.next_frame() im = cv2.imread(im_path) samples = gaussian_sample(im, gt, PARAMS, IMS_PER_FRAME) scores = np.zeros(IMS_PER_FRAME, dtype=np.float64) for i in range(IMS_PER_FRAME): db = [{ 'path': im_path, 'img': im, 'gt': gt, 'samples': [samples[i]] }] blob = get_next_mini_batch(db) blob = {'data': blob['data']} net.blobs['data'].reshape(*blob['data'].shape) out = net.forward(**blob)['cls_prob'] scores[i] = out[1]
def train_bbox_regression(net, im, gt, frame_samples): bboxes = [] X = None for sample in frame_samples: if sample['label'] == 1: db = [{'img': im, 'samples': [sample]}] blob = get_next_mini_batch(db) blob = {'data': blob['data'].astype(np.float32, copy=True)} net.blobs['data'].reshape(*blob['data'].shape) net.forward(data=blob['data']) x = net.blobs['pool3'].data[0] x = x.reshape((1, x.size)) if X is None: X = x else: X = np.vstack((X, x)) bboxes.append({ 'box': sample['box'], 'label': 1, 'overlap': sample['overlap'] }) regressor = bbox_reg() regressor.train(X, bboxes, gt) return regressor
def evaluate(evl, solver, net): samples = [] db = [] im_path, gt = evl.init_frame() im = cv2.imread(im_path) frame_samples = gaussian_sample(im, gt, PARAMS, INIT_TRAIN_FRAME) for i in range(len(frame_samples)): db.append({'path': im_path, 'img': im, 'samples': [samples[i]]}) solver.net.layers[0].get_db(db) solver.step(5000) if BBOX_REG: regressor = train_bbox_regression(net, im, gt, frame_samples) long_term = [0] short_term = [0] term = 0 # Begin testing im_path = evl.next_frame() while im_path is not None: term += 1 im = cv2.imread(im_path) samples = gaussian_sample(im, gt, PARAMS, IMS_PER_FRAME) frame_samples.append(samples) scores = np.zeros(IMS_PER_FRAME, dtype=np.float64) feats = [] for i in range(IMS_PER_FRAME): db = [{'path': im_path, 'img': im, 'samples': [samples[i]]}] blob = get_next_mini_batch(db) blob = {'data': blob['data']} net.blobs['data'].reshape(*blob['data'].shape) out = net.forward(**blob)['cls_prob'] scores[i] = out[1] feats.append(net.blobs['pool3'].data[0]) ind = np.argmax(scores) score = scores[ind] box = np.array(samples[ind]['box']).reshape((1, 4)) if score > threshold: long_term.append(term) short_term.append(term) if len(long_term) >= 100: long_term = long_term[-100:] if len(short_term) >= 20: short_term = short_term[-20:] feat = feats[ind] feat = feat.reshape((1, feat.size)) if BBOX_REG: box = regressor.predict(feat, box) if score < threshold: finetune(solver, frame_samples, short_term) elif term % 10 == 0: finetune(solver, frame_samples, long_term) evl.report(box.reshape((4, ))) gt = box.reshape((4, )) if VISUAL: ground_truth = evl.get_ground_truth() vis_detection(im_path, ground_truth, gt) im_path = evl.next_frame()
def evaluate(evl, solver, net, sample_num): frame_samples = [] db = [] timer.tic() im_path, gt = evl.init_frame() im = cv2.imread(im_path) pos_samples = mdnet_sample(im, gt, INIT_POS_PARAMS, 1000, stype='TEST') pos_samples = [sample for sample in pos_samples if sample['label'] == 1] neg_samples = mdnet_sample(im, gt, INIT_NEG_PARAMS, 5000, stype='TEST') neg_samples = [sample for sample in neg_samples if sample['label'] == 0] samples = pos_samples + neg_samples frame_samples.append(samples) for i in range(len(samples)): db.append({'img': im, 'samples': [samples[i]]}) solver.net.layers[0].get_db(db) solver.step(INIT_TRAIN_FRAME) timer.toc() print 'Pre-training takes {} seconds.'.format(timer.diff) if BBOX_REG: timer.tic() regressor = train_bbox_regression(net, im, gt, samples) timer.toc() print 'BBox regression training takes {} seconds.'.format(timer.diff) long_term = [0] short_term = [0] finetune_iter_ = 0 # long_term = [] # short_term = [] term = 0 # Begin testing total_timer = Timer() total_timer.tic() im_path = evl.next_frame() while im_path is not None: print '--------------------------------------------' timer.tic() term += 1 im = cv2.imread(im_path) samples = mdnet_sample(im, gt, TEST_PARAMS, sample_num, stype='TEST') scores = np.zeros(len(samples), dtype=np.float64) feats = [] for i in range(len(samples)): db = [{'path': im_path, 'img': im, 'samples': [samples[i]]}] blob = get_next_mini_batch(db) blob = {'data': blob['data']} net.blobs['data'].reshape(*blob['data'].shape) out = net.forward(**blob)['cls_prob'] scores[i] = out[0, 1] feats.append(net.blobs['pool3'].data[0]) ind = np.argmax(scores) topInds = np.argsort(scores)[::-1] score, targetLoc = mean(topInds, scores, samples) # score = scores[topInds[0]] # targetLoc = samples[topInds[0]]['box'] box = np.array(targetLoc)[np.newaxis, :] if score > threshold: TEST_PARAMS[0] = 0.1 TEST_PARAMS[1] = 0.1 long_term.append(term) short_term.append(term) pos_samples = mdnet_sample(im, targetLoc, POS_PARAMS, 100, stype='TEST') pos_samples = [ sample for sample in pos_samples if sample['label'] == 1 ] neg_samples = mdnet_sample(im, targetLoc, NEG_PARAMS, 400, stype='TEST') pos_samples = [ sample for sample in pos_samples if sample['label'] == 0 ] frame_samples.append(pos_samples + neg_samples) if len(long_term) >= 100: long_term = long_term[-100:] if len(short_term) >= 10: short_term = short_term[-10:] feat = feats[ind] feat = feat.reshape((1, feat.size)) if BBOX_REG: # print 'pre:', box box = regressor.predict(feat, box) # print 'after:', box timer.toc() print 'Prediction takes {} seconds'.format(timer.diff) print 'score: {}'.format(score) if score < threshold: finetune_iter_ += 1 TEST_PARAMS[0] = 1.1 * TEST_PARAMS[0] TEST_PARAMS[1] = 1.1 * TEST_PARAMS[1] finetune(solver, frame_samples, short_term) frame_samples.append([]) elif term % 20 == 0: finetune_iter_ += 1 finetune(solver, frame_samples, long_term) record._save_json() overlap_ = evl.report(box.reshape((4, ))) gt = box.reshape((4, )) record.add_overlap(sample_num, overlap_) # if VISUAL or score < threshold or (STVISUAL is not 0 and term % STVISUAL == 0): # ground_truth = evl.get_ground_truth() # vis_detection(im_path, ground_truth, gt) if term % 200 == 0: break else: im_path = evl.next_frame() total_timer.toc() print 'Total time {} seconds for {} pictures.'.format( total_timer.diff, term) print 'mAP: {}.'.format(evl.get_mAP()) record.add_record(sample_num=sample_num, frame_num=term, mAP=evl.get_mAP(), total_time=total_timer.diff, finetune_iter=finetune_iter_) record._save_json()