def test_net(net, db, output_dir): """Test a Weakly-supervised Pedestrian Attribute Localization Network on an image database.""" num_images = len(db.test_ind) all_attrs = [[] for _ in xrange(num_images)] # timers _t = {'recognize_attr': Timer()} threshold = np.ones(db.num_attr) * 0.5 cnt = 0 for i in db.test_ind: img_path = db.get_img_path(i) img = cv2.imread(img_path) _t['recognize_attr'].tic() try: attr, _, score, _ = recognize_attr(net, img, db.attr_group, threshold) _t['recognize_attr'].toc() all_attrs[cnt] = attr cnt += 1 if cnt % 100 == 0: print 'recognize_attr: {:d}/{:d} {:.3f}s' \ .format(cnt, num_images, _t['recognize_attr'].average_time) except: pass attr_file = os.path.join(output_dir, 'attributes.pkl') with open(attr_file, 'wb') as f: cPickle.dump(all_attrs, f, cPickle.HIGHEST_PROTOCOL) mA, accPerAttr, challenging = db.evaluate_mA(all_attrs, db.test_ind) print 'mA={:f}'.format(mA) print 'Challenging attributes:', challenging acc, prec, rec, f1 = db.evaluate_example_based(all_attrs, db.test_ind) print 'Acc={:f} Prec={:f} Rec={:f} F1={:f}'.format(acc, prec, rec, f1) acc_file = os.path.join(output_dir, 'acc.txt') with open(acc_file, 'w') as f: for i in xrange(min(db.num_attr, cfg.TEST.MAX_NUM_ATTR)): f.write('{}: {}\n'.format(db.attr_eng[i][0][0], accPerAttr[i])) f.write('mA: {}\n'.format(mA)) f.write('Acc: {} \t Prec: {} \t Rec: {} \t F1: {}\n'.format( acc, prec, rec, f1))
def locate_in_video(net, db, video_path, tracking_res_path, output_dir, pos_ave, neg_ave, dweight, attr_id_list): """Locate attributes of pedestrians in a video using a WPAL-network. The tracking results should be provided in a text file. """ cfg.TEST.MAX_AREA = cfg.TEST.MAX_AREA * 3 / 4 attr_ids = [int(s) for s in attr_id_list.split(',')] if len(attr_ids) > len(colors): print 'Cannot locate more than {} attributes in one video!'.format( len(colors)) return name_comb = db.attr_eng[attr_ids[0]][0][0] for attr_id in attr_ids[1:]: name_comb += db.attr_eng[attr_id][0][0] vid_path = os.path.join(output_dir, 'display', name_comb, os.path.basename(video_path)) if not os.path.exists(vid_path): os.makedirs(vid_path) # Read tracks with open(tracking_res_path) as f: num_tracklets = int(f.readline()) tracklets = [] for i in xrange(num_tracklets): f.readline() tracklet = {'start_frame_ind': int(f.readline())} num_bbox = int(f.readline()) bbox_seq = [] for j in xrange(num_bbox): line = f.readline() x, y, h, w = line.split() bbox_seq.append([int(x), int(y), int(h), int(w)]) tracklet['bbox_seq'] = bbox_seq tracklets.append(tracklet) threshold = np.ones(db.num_attr) * 0.5 cap = cv2.VideoCapture(video_path) fps = cap.get(cv2.cv.CV_CAP_PROP_FPS) writer = None frame_cnt = 0 while True: ret, frame = cap.read() if ret is False: break canvas = np.array(frame) for i in xrange(len(attr_ids)): cv2.rectangle(canvas, (frame.shape[1] - 300, 30 + 60 * i), (frame.shape[1] - 280, 50 + 60 * i), colors[i], thickness=20) cv2.putText(canvas, db.attr_eng[attr_ids[i]][0][0], (frame.shape[1] - 260, 50 + 60 * i), cv2.FONT_HERSHEY_COMPLEX, 1, colors[i], thickness=3) has_pedestrian = False for tracklet in tracklets: if tracklet['start_frame_ind'] \ <= frame_cnt \ < tracklet['start_frame_ind'] + len(tracklet['bbox_seq']): has_pedestrian = True bbox_seq = tracklet['bbox_seq'] bbox = bbox_seq[frame_cnt - tracklet['start_frame_ind']] cropped = frame[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]] # pass the image throught the test net. try: attr, heat_maps, score, img_scale = recognize_attr( net, cropped, db.attr_group, threshold, neglect=False) except ResizedSideTooShortException: print 'Skipped for too short side.' continue msg = '' for i in xrange(len(attr_ids)): if attr[attr_ids[i]] == 1: msg += db.attr_eng[attr_ids[i]][0][0] + ' ' print 'Recognized {}from Frame {}'.format(msg, frame_cnt) msg = '' for i in xrange(len(attr)): if attr[i] == 1 and not attr_ids.__contains__(i): msg += db.attr_eng[i][0][0] + ' ' print 'Unshown attributes: ' + msg cv2.imshow("cropped", cropped) cv2.waitKey(1) cropped_height = int(cropped.shape[0] * img_scale) cropped_width = int(cropped.shape[1] * img_scale) cropped = cv2.resize(cropped, (cropped_width, cropped_height)) for i in xrange(len(attr_ids)): attr_id = attr_ids[i] if attr[attr_id] != 1: continue act_map, centroids = locate(cropped, pos_ave, neg_ave, dweight, attr_id, db, attr, heat_maps, score, display=False) act_map = cv2.resize(act_map, (bbox[2], bbox[3])) for x in xrange(bbox[2]): for y in xrange(bbox[3]): fx = x + bbox[0] fy = y + bbox[1] canvas[fy][fx][0] = np.uint8( min( 255, canvas[fy][fx][0] + max(0, act_map[y][x]) * colors[i][0])) canvas[fy][fx][1] = np.uint8( min( 255, canvas[fy][fx][1] + max(0, act_map[y][x]) * colors[i][1])) canvas[fy][fx][2] = np.uint8( min( 255, canvas[fy][fx][2] + max(0, act_map[y][x]) * colors[i][2])) centroids = centroids[:, :2] / img_scale + (bbox[0], bbox[1]) cross_len = math.sqrt( frame.shape[0] * frame.shape[1]) * 0.02 thickness = len(centroids) * 2 for c in centroids: cv2.line(canvas, (int(c[0] - cross_len), int(c[1])), (int(c[0] + cross_len), int(c[1])), colors[i], thickness=thickness) cv2.line(canvas, (int(c[0]), int(c[1] - cross_len)), (int(c[0]), int(c[1] + cross_len)), colors[i], thickness=thickness) thickness -= 2 if has_pedestrian: if writer is None: writer = cv2.VideoWriter( os.path.join(vid_path, str(frame_cnt) + '.avi'), fourcc=cv2.cv.FOURCC('M', 'J', 'P', 'G'), fps=fps / 2, frameSize=(frame.shape[1], frame.shape[0]), isColor=True) cv2.imshow("Vis", canvas) cv2.waitKey(1) writer.write(canvas) elif writer is not None: writer = None cv2.destroyWindow("Vis") frame_cnt += 1
def estimate_param(net, db, output_dir, res_file, save_res=False): attrs = [] scores = [] labels = [] if res_file == None: cnt = 0 for i in db.train_ind: img = cv2.imread(db.get_img_path(i)) attr, _, score, _ = recognize_attr(net, img, db.attr_group) attrs.append(attr) scores.append([x for x in score]) labels.append(db.labels[i]) cnt += 1 if cnt % 1000 == 0: print 'Tested: {}/{}'.format(cnt, db.train_ind.__len__()) if save_res: print 'Saving results...' val_file = os.path.join(output_dir, 'val.pkl') with open(val_file, 'wb') as f: cPickle.dump({ 'attrs': attrs, 'scores': scores }, f, cPickle.HIGHEST_PROTOCOL) print 'Results stored to {}!'.format(val_file) else: print 'Loading stored results from {}.'.format(res_file) pack = cPickle.load(open(res_file, 'rb')) attrs = pack['attrs'] scores = pack['scores'] labels = db.labels[db.train_ind] print 'Stored results loaded!' pos_ave = np.zeros((db.num_attr, len( scores[0]))) # binding between attribute and detector or detector bin neg_ave = np.zeros((db.num_attr, len( scores[0]))) # binding between attribute and detector or detector bin # Estimate detector binding for i in xrange(db.num_attr): pos_ind = np.where(np.array([x[i] for x in labels]) > 0.5)[0] neg_ind = np.where(np.array([x[i] for x in labels]) < 0.5)[0] print 'For attr {}: pos={}; neg={}'.format(i, len(pos_ind), len(neg_ind)) pos_sum = np.zeros(len(scores[0]), dtype=float) neg_sum = np.zeros(len(scores[0]), dtype=float) for j in pos_ind: pos_sum += np.array(scores[j]) for j in neg_ind: neg_sum += np.array(scores[j]) pos_ave[i] = pos_sum / len(pos_ind) neg_ave[i] = neg_sum / len(neg_sum) print 'Estimated attr {}/{}'.format(i, db.num_attr) binding = np.exp(pos_ave / neg_ave) detector_file = os.path.join(output_dir, 'detector.pkl') with open(detector_file, 'wb') as f: cPickle.dump( { 'pos_ave': pos_ave, 'neg_ave': neg_ave, 'binding': binding }, f, cPickle.HIGHEST_PROTOCOL) return binding, pos_ave, neg_ave
def test_localization(net, db, output_dir, pos_ave, neg_ave, dweight, attr_id=-1, display=True, max_count=-1): """Test localization of a WPAL Network.""" iou_all = [] overlaprate_all = [] syn_inf = [] used_img_ind = [] used_img_label = [] used_img_pred = [] for i in range(0, 51): iou_all.append([]) overlaprate_all.append([]) cfg.TEST.MAX_AREA = cfg.TEST.MAX_AREA * 7 / 8 num_images = len(db.test_ind) if (max_count == -1): max_count = num_images threshold = np.ones(db.num_attr) * 0.5 if attr_id == -1: # locate whole body outline attr_list = xrange(db.num_attr) else: # locate only one attribute attr_list = [] attr_list.append(attr_id) cnt = 0 for img_ind in db.test_ind: img_path = db.get_img_path(img_ind) name = os.path.split(img_path)[1] # prepare the image img = cv2.imread(img_path) print img.shape[0], img.shape[1] # pass the image throught the test net. try: attr, heat_maps, score, img_scale = recognize_attr(net, img, db.attr_group, threshold, neglect=False) except ResizedImageTooLargeException: print 'Skipped for too large resized image.' continue except ResizedSideTooShortException: print 'Skipped for too short side.' continue # if attr_id != -1 and attr[attr_id] != 1: # print 'Image {} skipped for failing to be recognized attribute {} from!' \ # .format(name, db.attr_eng[attr_id][0][0]) # continue img_height = int(img.shape[0] * img_scale) img_width = int(img.shape[1] * img_scale) img = cv2.resize(img, (img_width, img_height)) if display: cv2.imshow("img", img) if attr_id == -1: total_superposition = np.zeros(img.shape[0:2], dtype=float) all_centroids = [] for a in attr_list: # check directory for saving visualization images vis_img_dir = os.path.join(output_dir, 'display', db.attr_eng[a][0][0], name) if not os.path.exists(vis_img_dir): os.makedirs(vis_img_dir) low = (4 * int(db.attr_position_ind[attr_id])) up = low + 4 bbxx, bbxy, bbxw, bbxh = db.position[int(img_ind)][0:4] xa1, ya1, pw, ph = db.position[int(img_ind)][low:up] xa1 = xa1 - bbxx xa1 = int(xa1 * img_scale) ya1 = ya1 - bbxy ya1 = int(ya1 * img_scale) pw = int(pw * img_scale) ph = int(ph * img_scale) if a == 9: ph /= 2 if a == 12: ph = ph * 3 / 4 if a == 13: ya1 += ph / 3 ph /= 3 if a == 14: ya1 += ph / 2 ph /= 2 if 15 <= a <= 23: ph = ph * 4 / 5 if 30 <= a <= 34: ya1 += 3 * ph / 4 ph /= 4 act_map, centroids, overlaprate_single, iou_single, pos_loc_img = locate( xa1, ya1, pw, ph, img_ind, img, pos_ave, neg_ave, dweight, a, db, attr, heat_maps, score, False and display and attr_id != -1, vis_img_dir) if pos_loc_img == 1: if attr_id != -1 and (db.labels[img_ind][attr_id] == 0 or db.labels[img_ind][attr_id] == 1): # used_img_ind.append(img_ind) # used_img_pred.append(attr[attr_id]) if attr_id != -1 and db.labels[img_ind][attr_id] == 0: print 'Image {} is a negative sample for attribute {}!' \ .format(name, db.attr_eng[attr_id][0][0]) a_i_label = 0 # used_img_label.append(0) else: a_i_label = 1 # used_img_label.append(1) print "Adding syn" syn_inf.append([ attr[attr_id], a_i_label, overlaprate_single, img_ind ]) # iou_all[a].append(iou_single) # overlaprate_all[a].append(overlaprate_single) if attr_id == -1: all_centroids += centroids total_superposition += act_map * 256 / len(attr_list) print 'Localized attribute {}: {}!'.format(a, db.attr_eng[a][0][0]) if attr_id == -1: img_area = img_height * img_width cross_len = math.sqrt(img_area) * 0.05 canvas = np.array(img) for j in xrange(img_height): for k in xrange(img_width): canvas[j][k][2] = min( 255, max( 0, canvas[j][k][2] + max(0, total_superposition[j][k]))) canvas[j][k][1] = min(255, max(0, canvas[j][k][1])) canvas[j][k][0] = min(255, max(0, canvas[j][k][0])) canvas = canvas.astype('uint8') for c in all_centroids: cv2.line(canvas, (int(c[0] - cross_len), int(c[1])), (int(c[0] + cross_len), int(c[1])), (0, 255, 255), thickness=4) cv2.line(canvas, (int(c[0]), int(c[1] - cross_len)), (int(c[0]), int(c[1] + cross_len)), (0, 255, 255), thickness=4) vis_img_dir = os.path.join(output_dir, 'display', 'body', name) if not os.path.exists(vis_img_dir): os.makedirs(vis_img_dir) if display: cv2.imshow("img", canvas) cv2.waitKey(0) cv2.destroyWindow("img") print 'Saving to:', os.path.join(vis_img_dir, 'final.jpg') cv2.imwrite(os.path.join(vis_img_dir, 'final.jpg'), canvas) cnt += 1 print 'Localized {} targets!'.format(cnt) if cnt >= max_count: break if attr_id != -1: if len(syn_inf) != 0: return syn_inf # return overlaprate_all[attr_id], iou_all[attr_id], used_img_ind, used_img_label, used_img_pred # overlaprate_all_attr_sum = 0.0 # iou_single_attr_sum = 0.0 # for x in iou_all[attr_id]: # iou_single_attr_sum += x # for y in overlaprate_all[attr_id]: # overlaprate_all_attr_sum += y # iou_single_attr_sum /= len(iou_all[attr_id]) # overlaprate_all_attr_sum /= len(overlaprate_all[attr_id]) # return overlaprate_all_attr_sum, iou_single_attr_sum else: return []
def test_localization(net, db, output_dir, pos_ave, neg_ave, dweight, attr_id=-1, display=True, max_count=-1): """Test localization of a WPAL Network.""" cfg.TEST.MAX_AREA = cfg.TEST.MAX_AREA * 7 / 8 num_images = len(db.test_ind) if (max_count == -1): max_count = num_images threshold = np.ones(db.num_attr) * 0.5 if attr_id == -1: # locate whole body outline attr_list = xrange(db.num_attr) else: # locate only one attribute attr_list = [] attr_list.append(attr_id) cnt = 0 for img_ind in db.test_ind: img_path = db.get_img_path(img_ind) name = os.path.split(img_path)[1] if attr_id != -1 and db.labels[img_ind][attr_id] == 0: print 'Image {} skipped for it is a negative sample for attribute {}!' \ .format(name, db.attr_eng[attr_id][0][0]) continue # prepare the image img = cv2.imread(img_path) print img.shape[0], img.shape[1] # pass the image throught the test net. try: attr, heat_maps, score, img_scale = recognize_attr(net, img, db.attr_group, threshold, neglect=False) except ResizedImageTooLargeException: print 'Skipped for too large resized image.' continue except ResizedSideTooShortException: print 'Skipped for too short side.' continue if attr_id != -1 and attr[attr_id] != 1: print 'Image {} skipped for failing to be recognized attribute {} from!' \ .format(name, db.attr_eng[attr_id][0][0]) continue img_height = int(img.shape[0] * img_scale) img_width = int(img.shape[1] * img_scale) img = cv2.resize(img, (img_width, img_height)) if display: cv2.imshow("img", img) if attr_id == -1: total_superposition = np.zeros(img.shape[0:2], dtype=float) all_centroids = [] for a in attr_list: # check directory for saving visualization images vis_img_dir = os.path.join(output_dir, 'display', db.attr_eng[a][0][0], name) if not os.path.exists(vis_img_dir): os.makedirs(vis_img_dir) act_map, centroids = locate(img, pos_ave, neg_ave, dweight, a, db, attr, heat_maps, score, display and attr_id != -1, vis_img_dir) if attr_id == -1: all_centroids += centroids total_superposition += act_map * 256 / len(attr_list) print 'Localized attribute {}: {}!'.format(a, db.attr_eng[a][0][0]) if attr_id == -1: img_area = img_height * img_width cross_len = math.sqrt(img_area) * 0.05 canvas = np.array(img) for j in xrange(img_height): for k in xrange(img_width): canvas[j][k][2] = min(255, max(0, canvas[j][k][2] + max(0, total_superposition[j][k]))) canvas[j][k][1] = min(255, max(0, canvas[j][k][1])) canvas[j][k][0] = min(255, max(0, canvas[j][k][0])) canvas = canvas.astype('uint8') for c in all_centroids: cv2.line(canvas, (int(c[0] - cross_len), int(c[1])), (int(c[0] + cross_len), int(c[1])), (0, 255, 255), thickness=4) cv2.line(canvas, (int(c[0]), int(c[1] - cross_len)), (int(c[0]), int(c[1] + cross_len)), (0, 255, 255), thickness=4) vis_img_dir = os.path.join(output_dir, 'display', 'body', name) if not os.path.exists(vis_img_dir): os.makedirs(vis_img_dir) if display: cv2.imshow("img", canvas) cv2.waitKey(0) cv2.destroyWindow("img") print 'Saving to:', os.path.join(vis_img_dir, 'final.jpg') cv2.imwrite(os.path.join(vis_img_dir, 'final.jpg'), canvas) cnt += 1 print 'Localized {} targets!'.format(cnt) if cnt >= max_count: break