def main(im, bbox): # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 siam = SiameseNet(env.root_pretrained, design.net) with Image.fromarray(im) as img: frame_sz = np.asarray(img.size) frame_sz[1], frame_sz[0] = frame_sz[0], frame_sz[1] im = Image.fromarray(im) torch.save(siam.state_dict(), '/home/nvidia/jlaplaza/siamfc_pytorch_test/siamfc_pretrained.pt') #gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox(bbox) print(target_w, target_h) # bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, # filename, image, templates_z, scores, evaluation.start_frame) tracker(hp, run, design, im, pos_x, pos_y, target_w, target_h, final_score_sz, siam, evaluation.start_frame) """
def evaluate(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) frame_sz = [i for i in cv2.imread(frame_name_list[0]).shape] siamNet = siam.Siamese(batch_size = 1); image, z_crops, x_crops, templates_z, scores, loss, _, distance_to_gt, summary, templates_x, max_pos_x, max_pos_y = siamNet.build_tracking_graph_train(final_score_sz, design, env, hp, frame_sz) pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, image, templates_z, scores, evaluation.start_frame, path_ckpt = os.path.join(design.saver_folder, design.path_ckpt), siamNet = siamNet) _, precision, precision_auc, iou = _compile_results(gt, bboxes, evaluation.dist_threshold) print(evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --')
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build the computational graph of Siamese fully-convolutional network siamNet = siam.Siamese(design.batch_size) # get tensors that will be used during training image, z_crops, x_crops, templates_z, scores, loss, train_step, distance_to_gt, summary = siamNet.build_tracking_graph_train( final_score_sz, design, env, hp) # read tfrecodfile holding all the training data data_reader = src.read_training_dataset.myReader(design.resize_width, design.resize_height, design.channel) batched_data = data_reader.read_tfrecord(os.path.join( env.tfrecord_path, env.tfrecord_filename), num_epochs=design.num_epochs, batch_size=design.batch_size) # run trainer trainer(hp, run, design, final_score_sz, batched_data, image, templates_z, scores, loss, train_step, distance_to_gt, z_crops, x_crops, siamNet, summary)
def run_SiamFCpytorch(seq, rp, bSaveImage): hp, evaluation, run, env, design = parse_arguments() #final_score_sz = hp.response_up * (design.score_sz - 1) + 1 final_score_sz = 265 siam = SiameseNet(env.root_pretrained, design.net) load_net(NET_PATH, siam) siam.cuda() frame_name_list = seq.s_frames init_rect = seq.init_rect x, y, width, height = init_rect # OTB format init_bb = Rectangle(x - 1, y - 1, float(width), float(height)) init_bb = convert_bbox_format(init_bb, 'center-based') bboxes, speed = tracker(hp, run, design, frame_name_list, init_bb.x, init_bb.y, init_bb.width, init_bb.height, final_score_sz, siam, evaluation.start_frame) trajectory = [ Rectangle(val[0] + 1, val[1] + 1, val[2], val[3]) for val in bboxes ] result = dict() result['res'] = trajectory result['type'] = 'rect' result['fps'] = speed return result
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all #filename, image, templates_z, scores = siam.build_tracking_graph(final_score_sz, design, env) siamNet = siam.Siamese(design.batch_size) image, z_crops, x_crops, templates_z, scores, loss, train_step, distance_to_gt, summary, tz, max_pos_x, max_pos_y = siamNet.build_tracking_graph_train( final_score_sz, design, env, hp) batched_data = read_tfrecord(os.path.join(env.tfrecord_path, env.tfrecord_filename), num_epochs=design.num_epochs, batch_size=design.batch_size) trainer(hp, run, design, final_score_sz, image, templates_z, scores, loss, train_step, distance_to_gt, batched_data, z_crops, x_crops, siamNet, summary, tz, max_pos_x, max_pos_y)
def __init__(self): super(SiamMCF, self).__init__("SiamMCF") root_dir = path_config.SIAMMCF_ROOT_DIR self.hp, self.evaluation, self.env, self.design = parse_arguments(root_dir) self.final_score_sz = self.hp.response_up * (self.design.score_sz - 1) + 1 # build TF graph once for all ( self.filename, self.image, self.templates_x, self.templates_z, self.scores_list, ) = siam.build_tracking_graph( root_dir, self.final_score_sz, self.design, self.env, self.hp ) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess) vars_to_load = [] for v in tf.global_variables(): if "postnorm" not in v.name: vars_to_load.append(v) siam_ckpt_name = path_config.SIAMMCF_MODEL siam_saver = tf.train.Saver(vars_to_load) siam_saver.restore(self.sess, siam_ckpt_name)
def __init__(self, imagefile, region): #param self.hp, self.evaluation, self.run, self.env, self.design = parse_arguments( ) self.final_score_sz = 273 #init network self.siam = SiameseNet(self.env.root_pretrained, self.design.net) NET_PATH = '/home/lee/tracking/challenge/vot-toolkit/tracker/examples/python/pretrained/000100vggv1net1-5.weights' load_net(NET_PATH, self.siam) self.siam.cuda() #init bbox bbox = convert_bbox_format(region, 'center-based') self.pos_x, self.pos_y, self.target_w, self.target_h = bbox.x, bbox.y, bbox.width, bbox.height #init scale factor, penalty self.scale_factors = self.hp.scale_step**np.linspace( -np.ceil(self.hp.scale_num / 2), np.ceil(self.hp.scale_num / 2), self.hp.scale_num) hann_1d = np.expand_dims(np.hanning(self.final_score_sz), axis=0) self.penalty = np.transpose(hann_1d) * hann_1d self.penalty = self.penalty / np.sum(self.penalty) context = self.design.context * (self.target_w + self.target_h) self.z_sz = np.sqrt( np.prod((self.target_w + context) * (self.target_h + context))) self.x_sz = float( self.design.search_sz) / self.design.exemplar_sz * self.z_sz image_, self.templates_z_ = self.siam.get_template_z_new( self.pos_x, self.pos_y, self.z_sz, imagefile, self.design)
def __init__(self): hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 self.image_input = tf.placeholder(tf.float32, name='img_in', shape=(360, 640, 3)) self.pos_x_ph = tf.placeholder(tf.float64, name='pos_x_ph', shape=(1,)) self.pos_y_ph = tf.placeholder(tf.float64, name='pos_y_ph', shape=(1,)) # target的尺寸 size self.z_sz_ph = tf.placeholder(tf.float64, name='z_sz_ph', shape=(1,)) # 对search input 进行三种系数的缩放后的输入结果 # 将search input进行不同大小的缩放,满足当target的scale出现变化时, # tracker也能保证sampler和search input中的target大小尽可能相似 self.x_sz0_ph = tf.placeholder(tf.float64, name='x_sz0_ph', shape=(1,)) self.x_sz1_ph = tf.placeholder(tf.float64, name='x_sz1_ph', shape=(1,)) self.x_sz2_ph = tf.placeholder(tf.float64, name='x_sz2_ph', shape=(1,)) # self.pos_x_ph = tf.placeholder(tf.float64, name='pos_x_ph', ) # self.pos_y_ph = tf.placeholder(tf.float64, name='pos_y_ph', ) # self.z_sz_ph = tf.placeholder(tf.float64, name='z_sz_ph', ) # self.x_sz0_ph = tf.placeholder(tf.float64, name='x_sz0_ph', ) # self.x_sz1_ph = tf.placeholder(tf.float64, name='x_sz1_ph', ) # self.x_sz2_ph = tf.placeholder(tf.float64, name='x_sz2_ph', ) self.template_x, self.templates_z, self.scores, \ self.crop_x, self.crop_z, \ self.padded_x, self.padded_z = _build_tracking_graph(self.image_input, final_score_sz, design, env, self.pos_x_ph, self.pos_y_ph, self.z_sz_ph, self.x_sz0_ph, self.x_sz1_ph, self.x_sz2_ph) self.scale_factors = hp.scale_step ** np.linspace(-np.ceil(hp.scale_num / 2), np.ceil(hp.scale_num / 2), hp.scale_num) self.scale_factors = np.expand_dims(self.scale_factors, axis=-1) self.final_score_sz = hp.response_up * (design.score_sz - 1) + 1 self.template_data = None """ region 形式: 中心点坐标 + 目标的长宽 """ self.last_pos_x = None self.last_pos_y = None self.target_w = 160. self.target_h = 160. # cosine window to penalize large displacements self.hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0) penalty = np.transpose(self.hann_1d) * self.hann_1d self.penalty = penalty / np.sum(penalty) self.context = design.context * (self.target_w + self.target_h) self.z_sz = np.sqrt(np.prod((self.target_w + self.context) * (self.target_h + self.context))) self.x_sz = float(design.search_sz) / design.exemplar_sz * self.z_sz self.hp = hp self.design = design
def ColdInit(self, imagepath, region): #Parse the arguments self.hp, self.evaluation, self.run, self.env, self.design = parse_arguments( mode='siamese') #Get first frame image and ground-truth self.region = region self.pos_x = region.x + region.width / 2 self.pos_y = region.y + region.height / 2 self.target_w = region.width self.target_h = region.height self.bbox = self.pos_x - self.target_w / 2, self.pos_y - self.target_h / 2, self.target_w, self.target_h #Calculate the size of final score (upscaled size of score matrix, where score matrix # is convolution of results of two branches of siamese network) self.final_score_sz = self.hp.response_up * (self.design.score_sz - 1) + 1 #Initialize the COLOR network and load the weights self.color_params = self.InitColorNetwork() #Initialize the SIAMESE network and load the weights self.siam_params = self.InitSiamNetwork() #Calculate the scale factors self.scale_factors = self.hp.scale_step**np.linspace( -np.ceil(self.hp.scale_num / 2), np.ceil(self.hp.scale_num / 2), self.hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(self.final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d self.penalty = penalty / np.sum(penalty) #Calculate search and target patch sizes context = self.design.context * (self.target_w + self.target_h) self.z_sz = np.sqrt( np.prod((self.target_w + context) * (self.target_h + context))) self.x_sz = float( self.design.search_sz) / self.design.exemplar_sz * self.z_sz #Load the colorization model self.LoadColorModel() #Extract Siam template image_, templates_z_ = self.ExtractSiamTemplate(imagepath) self.siam_ret = {"image_": image_, "templates_z_": templates_z_} #Extract Color template templates_z_, z_crops_ = self.ExtractColorTemplate(imagepath) self.color_ret = {"templates_z_": templates_z_, "z_crops_": z_crops_} return
def main(): # Avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' # --- Parse arguments from JSON file --- hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env) # --- Start Streaming from Video --- cap = cv2.VideoCapture(env.root_sequences + '/' + sys.argv[1] + '.mp4') ret, frame = cap.read() if (not ret): print "Error opening video sequence" # --- Save Video (Optional) --- if run.save_video: vid_write = cv2.VideoWriter( env.root_sequences + '/' + sys.argv[1] + '_out.avi', cv2.VideoWriter_fourcc(*'MJPG'), 25, (frame.shape[1], frame.shape[0]), True) # --- Define Initial Bounding Box --- BB = click_and_crop(frame, design.window_name) cv2.namedWindow(design.window_name) cv2.startWindowThread() cv2.setMouseCallback(design.window_name, BB.callback) cv2.imshow(design.window_name, frame) cv2.waitKey(0) while True: if cv2.waitKey(1) & 0xFF == ord('q'): break # ----- Define Initial Bounding Box Params & Template ----- pos_x = int((BB.refPt[0][0] + BB.refPt[1][0]) / 2) # Template Center pos_y = int((BB.refPt[0][1] + BB.refPt[1][1]) / 2) # Template Center target_w = int(abs(BB.refPt[1][0] - BB.refPt[0][0])) # Template Width / 2 target_h = int(abs(BB.refPt[1][1] - BB.refPt[0][1])) # Template Height / 2 # ----- Beging Tracking ----- tracker(hp, run, design, pos_x, pos_y, target_w, target_h, final_score_sz, templates_z, scores, cap, vid_write, frame) cap.release() cv2.destroyAllWindows() if run.save_video: vid_write.release()
def main(): #avoid printing TF debugging information(only show error log) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' hp, evaluation, run, env, design = parse_arguments() #build TF graph in siamese once for all #siam.init_create_net() filename, siam_net_z, loss, train_op = siam.make_siameseFC(env, design, hp) #iterate through all videos of evaluation.dataset if evaluation.video == 'all': #the path of folder of all videos train_data_folder = os.path.join(env.root_train_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(train_data_folder)] videos_list.sort() num_v = len(videos_list) for i in range(num_v): gt, frame_name_list, frame_sz, n_frames = _init_train_video( env, evaluation, videos_list[i]) start_frame = evaluation.start_frame #not sure #gt_=gt[start_frame:,:] gt_ = gt[start_frame:] frame_name_list_ = frame_name_list[start_frame:] num_frames = np.size(frame_name_list_) for j in range(num_frames - 1): pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[j]) #train_siam_net(design,hp,frame_name_list,z_index,pos_x,pos_y,target_w,target_h,filename,siam_net_z,loss) train_siam_net(design, hp, frame_name_list, j, pos_x, pos_y, target_w, target_h, filename, siam_net_z, loss, train_op) else: gt, frame_name_list, _, _ = _init_train_video(env, evaluation, evaluation.video) start_frame = evaluation.start_frame gt_ = gt[start_frame:] frame_name_list_ = frame_name_list[start_frame:] num_frames = np.size(frame_name_list_) train_siam_net(design, hp, frame_name_list, num_frames, gt, filename, siam_net_z, loss, train_op) '''for i in range(num_frames-1):
def main(process, queue, box, video): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all image, templates_z, scores = siam.build_tracking_graph(final_score_sz, design, env) # read radio # width = 640 # height = 480 # process1 = ( # ffmpeg # .input('tcp://192.168.1.155:8300',vcodec='h264',r = 24,probesize=32,fflags="nobuffer",flags="low_delay",analyzeduration=1) # .output('pipe:', format='rawvideo',pix_fmt="rgb24") # .run_async(pipe_stdout=True) # ) ## model # model_path = './frozen_inference_graph.pb' # odapi = DetectorAPI(path_to_ckpt=model_path) # while True : # in_bytes = process1.stdout.read(width * height * 3) # if not in_bytes : # print ("none") # video = (np.frombuffer(in_bytes, np.uint8).reshape([height, width, 3])) # video = cv2.cvtColor(video, cv2.COLOR_RGB2BGR) # read target from mat # box = odapi.processFrame(video) box[2] -= box[0] box[3] -= box[1] box[0] += box[2]/2 box[1] += box[3]/2 print ('box', box) pos_x, pos_y, target_w, target_h = box[0], box[1], box[2], box[3] tracker(hp, run, design, video, pos_x, pos_y, target_w, target_h, final_score_sz, image, templates_z, scores, process, queue) print ('done')
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all image, templates_z, scores = siam.build_tracking_graph_2( final_score_sz, design, env) # iterate through all videos of evaluation.dataset gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox( gt[evaluation.start_frame]) track_cam(hp, run, design, final_score_sz, image, templates_z, scores, evaluation.start_frame)
def main(): _, _, _, env, design = parse_arguments() gpu_options = tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=0.3) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) cam = cv2.VideoCapture('/home/yoonyoungcho/ext/frame%04d.jpg') for i in range(10): ret, image_ = cam.read() bbox_ = init_bbox(image_) tracker = SiamFCTracker(env, design) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) tracker.initialize(sess, image_, bbox_) while True: ret, image_ = cam.read() if not ret: break start = time.time() bbox_ = tracker.update(sess, image_) fps = 1.0 / (time.time() - start) if bbox_ is None: bbox_ = init_bbox(image_) tracker.initialize(sess, image_, bbox_) else: x, y, w, h = map(int, bbox_) cv2.rectangle(image_, (x, y), (x + w, y + h), (255, 0, 0), 2) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(image_, 'fps:%d' % fps, (0, 20), font, 0.5, (0, 0, 255), 1) cv2.imshow('image', image_) if cv2.waitKey(10) == 27: break
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) #由于得到的sorcemap与原图像大小不一致,所以要在这里按比例进行放大以此来得到图像中每个位置对应的score #这里的final也就是design中的search_sz final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env, hp) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): if os.path.exists(os.path.join('data/result', videos_list[i])): continue gt, frame_name_list, frame_sz, n_frames, video_folder, equal = _init_video( env, evaluation, videos_list[i]) if not equal: print('The .jpg and .xml is not equal in', video_folder) continue starts = np.rint( np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] # for j in range(evaluation.n_subseq): for j in range(1): start_frame = int(starts[j]) gt_ = gt[start_frame:] frame_name_list_ = frame_name_list[start_frame:] # pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) pos_x, pos_y, target_w, target_h = xml_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame) lengths[idx], precisions[idx], precisions_auc[idx], ious[idx] =\ _compile_results(gt_, bboxes, evaluation.dist_threshold) print str(i) + ' -- ' + videos_list[i] + \ ' -- Precision: ' + "%.2f" % precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ ' -- Speed: ' + "%.2f" % speed[idx] + ' --' print # Draw_Result(bboxes, frame_name_list_, gt_, env) tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print print '-- Overall stats (averaged per frame) on ' + str( nv) + ' videos (' + str(tot_frames) + ' frames) --' print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" % mean_iou +\ ' -- Speed: ' + "%.2f" % mean_speed + ' --' print else: gt, frame_name_list, _, _, video_folder, equal = _init_video( env, evaluation, evaluation.video) if not equal: print("The .jpg and .xml is not equal in", video_folder) exit(0) pos_x, pos_y, target_w, target_h = xml_to_bbox( gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) _, precision, precision_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) print evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --' print
def runSiamfc(folderPath, fourcc, testWidth, testHeight, graph, scfg): print('Running Siamfc: ' + folderPath) frame_name_list = _init_video(folderPath) hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 fp = open(os.path.join(folderPath, 'YoloBoxes.txt')) filename, image, templates_z, scores, graph1, scfg1 = siam.build_tracking_graph( final_score_sz, design, env) finalImages = [] Allbboxes = [] SiamfcVid = cv2.VideoWriter(join(folderPath, 'SiamfcVid.avi'), fourcc, 10, (testWidth, testHeight)) f = open(folderPath + "/SiamfcBoxes.txt", "w+") nucAngles = open(os.path.join(folderPath, "nuclearAngles.txt"), "r") print(refreshRate) for i in range(len(frame_name_list)): line = fp.readline() finalImages = [] if (line == '\n'): continue elif (i % refreshRate == 0): boxes = line[:-1].split(':') boxNr = 0 for j in Allbboxes: #label = j.label #pos_x = j.positions[len(j.positions)-1][0] #pos_y = j.positions[len(j.positions)-1][1] #target_w = j.positions[len(j.positions)-1][2] #target_h = j.positions[len(j.positions)-1][3] #bboxes, speed, finalImages = tracker(graph1, scfg1, hp, run, design, frame_name_list[i:i+refreshRate-1], pos_x, pos_y, target_w, target_h, final_score_sz, # filename, image, templates_z, scores, label,0,colors[boxNr%len(colors)],0,refreshRate-1, finalImages, 0) #j.positions = np.concatenate((j.positions,bboxes),0) j.padafter(refreshRate) for j in boxes: box = j.split(',') label = box[0] box = map(int, box[1:]) print( 'In folder %s Image %d has a box at %d,%d,%d,%d with label %s' % (folderPath, i, box[0], box[1], box[2], box[3], label)) pos_x = box[0] pos_y = box[1] target_w = box[2] target_h = box[3] print('Pos_x: %d, Pos_y:%d, width:%d, height:%d' % (pos_x, pos_y, target_w, target_h)) bboxes, speed, finalImages = tracker( graph1, scfg1, hp, run, design, frame_name_list[i:i + refreshRate - 1], pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, label, 0, colors[boxNr % len(colors)], 0, refreshRate - 1, finalImages, 0) newBox = sfc_bbox(colors[boxNr % len(colors)], label, bboxes, 0) newBox.padfront(i) Allbboxes.append(newBox) boxNr = boxNr + 1 print(bboxes) fname = i probs = [0] * len(Allbboxes) #print(Allbboxes) if (liveFeed): try: oldFolderPath = folderPath.split('_') oldFolderPath[2] = str(int(oldFolderPath[2]) - 1) underscore = '_' oldFolderPath = underscore.join(oldFolderPath) except: print("Could not load old probabilies") oldFolderPath = "" else: oldFolderPath = "" if (os.path.isfile(oldFolderPath + "/SiamfcBoxes.txt")): print('Extracting old probs') probs = getOldProbs(oldFolderPath, Allbboxes, [(item.split(','))[0] for item in boxes]) print(probs) for j in range(len(finalImages)): #print(probs) angle = int(nucAngles.readline()) calcProbs(finalImages[j], angle, Allbboxes, i + j, f) cv2.circle(finalImages[j], (int(float(1.0 - angle / 180.0) * testWidth), int(testHeight / 2)), 10, (0, 0, 225), -1) SiamfcVid.write(finalImages[j]) cv2.imwrite(frame_name_list[j][0:-4] + '_siamfc.png', finalImages[j]) fname = fname + 1 #break f.close() SiamfcVid.release() return
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_z, scores = siam.build_tracking_graph(final_score_sz, design, env) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): images_arr, gt, frame_name_list, frame_sz, n_frames = _init_video(env, evaluation, videos_list[i]) starts = np.rint(np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame) lengths[idx], precisions[idx], precisions_auc[idx], ious[idx] = compile_results(gt, bboxes, evaluation.dist_threshold) print(str(i) + ' -- ' + videos_list[i] + ' -- Precision: ' + "%.2f" % precisions[idx] + ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + ' -- IOU: ' + "%.2f" % ious[idx] + ' -- Speed: ' + "%.2f" % speed[idx] + ' --') tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --') print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision + ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + ' -- IOU: ' + "%.2f" % mean_iou + ' -- Speed: ' + "%.2f" % mean_speed + ' --') else: images_arr, gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) num_frames = np.size(frame_name_list) bboxes_final = np.zeros((num_frames,4)) lk_params = dict( winSize = (5,5), maxLevel = 2, criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 1, 0.03)) bboxes_final = np.zeros((num_frames, 4)) for i in range(1,len(images_arr)-1): # Create some random colors color = np.random.randint(0,255,(100,3)) # Take first frame and find corners in it #ret, old_frame = cap.read() frame = images_arr[i+1] old_frame = images_arr[i] old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY) p0 = np.zeros((1,1,2), dtype=np.float32) bbox_i = bboxes[i] c, r, w, h = int(bbox_i[0]), int(bbox_i[1]), int(bbox_i[2]), int(bbox_i[3]) p0[0,0,0] = c p0[0,0,1] = r frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params) good_new = p1[st==1] bboxes_final[i,:] = p1[0][0][0], p1[0][0][1], w, h good_old = p0[st==1] for i,(new,old) in enumerate(zip(good_new,good_old)): a,b = new.ravel() c,d = old.ravel() fig = plt.figure(1) ax = fig.add_subplot(111) r1 = patches.ConnectionPatch((a,b),(c,d),'data','data',arrowstyle="-|>") r2 = patches.Circle((a,b),5,color='r')#((x,y), w, h, linewidth=2, edgecolor='r', fill=False) ax.imshow(np.uint8(frame)) ax.add_patch(r2) ax.add_patch(r1) plt.ion() plt.show() plt.pause(0.001) plt.clf() old_gray = frame_gray.copy() p0 = good_new.reshape(-1,1,2) _, precision, precision_auc, iou = _compile_results(gt, bboxes_final, evaluation.dist_threshold) print(evaluation.video + ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision + ' -- Precision AUC: ' + "%.2f" % precision_auc + ' -- IOU: ' + "%.2f" % iou + ' -- Speed: ' + "%.2f" % speed + ' --')
def __init__(self, image_path, region): #Parse the arguments self.hp, self.evaluation, self.run, self.env, self.design = parse_arguments( mode='siamese') #Get first frame image and ground-truth self.region = region self.pos_x = region.x + region.width / 2 self.pos_y = region.y + region.height / 2 self.target_w = region.width self.target_h = region.height self.bbox = self.pos_x - self.target_w / 2, self.pos_y - self.target_h / 2, self.target_w, self.target_h #Calculate the size of final score (upscaled size of score matrix, where score matrix # is convolution of results of two branches of siamese network) self.final_score_sz = self.hp.response_up * (self.design.score_sz - 1) + 1 #Initialize the network and load the weights self.filename, self.image, self.templates_z, \ self.templates_x, self.scores, self.scores_original = siam.build_tracking_graph(self.final_score_sz, self.design, self.env) #Calculate the scale factors self.scale_factors = self.hp.scale_step**np.linspace( -np.ceil(self.hp.scale_num / 2), np.ceil(self.hp.scale_num / 2), self.hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(self.final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d self.penalty = penalty / np.sum(penalty) #Calculate search and target patch sizes context = self.design.context * (self.target_w + self.target_h) self.z_sz = np.sqrt( np.prod((self.target_w + context) * (self.target_h + context))) self.x_sz = float( self.design.search_sz) / self.design.exemplar_sz * self.z_sz #Create a tensorflow session config = tf.ConfigProto() config.gpu_options.visible_device_list = "1" config.gpu_options.per_process_gpu_memory_fraction = 0.9 self.sess = tf.Session(config=config) with self.sess.as_default(): tf.global_variables_initializer().run() # Coordinate the loading of image files. self.coord = tf.train.Coordinator() self.threads = tf.train.start_queue_runners(coord=self.coord) self.run_opts = {} #Calculate the template for the given region image_, self.templates_z_ = self.sess.run( [self.image, self.templates_z], feed_dict={ siam.pos_x_ph: self.pos_x, siam.pos_y_ph: self.pos_y, siam.z_sz_ph: self.z_sz, self.filename: image_path }) return
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Load hyperparameter (hp), evaluation, run, environment (env) and design parameters from # parameters/ directory. hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): gt, frame_name_list, frame_sz, n_frames = _init_video( env, evaluation, videos_list[i]) starts = np.rint( np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame) lengths[idx], precisions[idx], precisions_auc[idx], ious[ idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold) print str(i) + ' -- ' + videos_list[i] + \ ' -- Precision: ' + "%.2f" % precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ ' -- Speed: ' + "%.2f" % speed[idx] + ' --' print tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print '-- Overall stats (averaged per frame) on ' + str( nv) + ' videos (' + str(tot_frames) + ' frames) --' print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" % mean_iou +\ ' -- Speed: ' + "%.2f" % mean_speed + ' --' print else: gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox( gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) _, precision, precision_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) print evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --' print
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments(mode="conv2") # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * design.score_sz # build TF graph once for all image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() # videos_list = videos_list[91:][:] #only use vot 2016 nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) success_auc = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): # gt, frame_name_list, frame_sz, n_frames, img_mode = _init_video(env, evaluation, videos_list[i]) gt, frame_name_list, frame_sz, n_frames, img_mode = _init_video_OTB( env, evaluation, videos_list[i]) starts = np.rint( np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, image, templates_z, scores, start_frame) lengths[idx], precisions[idx], precisions_auc[idx], ious[ idx], success_auc[idx] = _compile_results( gt_, bboxes, evaluation.dist_threshold) print str(i) + ' -- ' + videos_list[i] + \ ' -- Precision: ' + "%.2f" % precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ ' -- Success AUC: ' + "%.2f" % success_auc[idx] + \ ' -- Speed: ' + "%.2f" % speed[idx] + ' --' print tot_frames = np.mean(lengths) mean_precision = np.mean(precisions) mean_precision_auc = np.mean(precisions_auc) mean_iou = np.mean(ious) mean_success_auc = np.mean(success_auc) mean_speed = np.mean(speed) print 'data set ' + evaluation.dataset + ' z_lr %f scale_step %f scale_penalty %f scale_lr %f window_influence %f' % ( hp.z_lr, hp.scale_step, hp.scale_penalty, hp.scale_lr, hp.window_influence) print '-- Overall stats (averaged per frame) on ' + str( nv) + ' videos (' + str(tot_frames) + ' frames) --' print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" % mean_iou +\ ' -- Success AUC: ' + "%.2f" % mean_success_auc +\ ' -- Speed: ' + "%.2f" % mean_speed + ' --' print with open('log_test.txt', 'a+') as f: f.write(time.asctime(time.localtime(time.time())) + '\r\n') f.write( 'data set ' + evaluation.dataset + ' z_lr %f scale_step %f scale_penalty %f scale_lr %f window_influence %f \r\n' % (hp.z_lr, hp.scale_step, hp.scale_penalty, hp.scale_lr, hp.window_influence)) f.write('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --\r\n') f.write(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision + \ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + \ ' -- IOU: ' + "%.2f" % mean_iou + \ ' -- AUC: ' + "%.3f" % mean_success_auc + \ ' -- Speed: ' + "%.2f" % mean_speed + ' --\r\n') f.write('\r\n') else: gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox( gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) _, precision, precision_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) print evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --' print
def main(): #Command Line Arguments parser = argparse.ArgumentParser(description="Run WIND Project") parser.add_argument('-c', '--clear', action='store_true', default=False, help='Delete old camera data') parser.add_argument('-ny', '--noYolo', action='store_true', default=False, help='Do not use Yolo') parser.add_argument('-ns', '--noSiamfc', action='store_true', default=False, help='Do not use Simafc') parser.add_argument('-nv', '--noVideo', action='store_true', default=False, help='Do not generate video') parser.add_argument('-dl', '--dataLocation', choices=['fromFile', 'fromCamera'], help='Use live camera or folder of images') parser.add_argument('-cn', '--cameraNumber', default=0, help='Camera number to use') parser.add_argument('-ff', '--filesFolder', default="") parser.add_argument('-rf', '--refreshRate', default=10, help='Refresh rate for siamfc') parser.add_argument('-t', '--timeRecording', default=3, help='Seconds to record from camera') #Assign command line arguments to global variables global cameraNumber global doYolo global doSiamfc global genVideos global refreshRate global liveFeed args = parser.parse_args() cameraNumber = int(args.cameraNumber) doYolo = not args.noYolo doSiamfc = not args.noSiamfc genVideos = not args.noVideo refreshRate = int(args.refreshRate) liveFeed = args.dataLocation == 'fromCamera' VideoLength = int(args.timeRecording) #Command line argument error checking #Clearing out old data in the cameradata folder if (args.clear == True): for oldData in os.listdir('CameraData'): filep = os.path.join('CameraData', oldData) if (os.path.isfile(filep)): os.remove(filep) print('Deleted: ' + filep) elif os.path.isdir(filep): for reallyOldData in os.listdir(filep): newfilep = os.path.join(filep, reallyOldData) if (os.path.isfile(newfilep)): os.remove(newfilep) os.rmdir(filep) print('Deleted: ' + filep + '/') print('Done Deleting') #Checking if file folder is valid if (args.dataLocation == 'fromFile'): if (os.path.isdir(args.filesFolder)): dirList = [ os.path.join(args.filesFolder, d) for d in os.listdir(args.filesFolder) if os.path.isdir(os.path.join(args.filesFolder, d)) ] dirList.sort() testImage = Image.open( os.path.join(dirList[0], os.listdir(dirList[0])[0])) testWidth, testHeight = testImage.size else: if (args.filesFolder == ""): print("No folder locatoin was given") else: print(args.filesFolder + " is not a valid file location") return #Check if the camera is valid else: try: cam = cv2.VideoCapture(cameraNumber) ret, testImage = cam.read() testHeight, testWidth = testImage.shape[:2] cam.release() except: print("Camera number given is not valid or connected") return # Initialize object detector database.net = load_net(b"YoloConfig/yolov3-tiny.cfg", b"YoloConfig/yolov3-tiny.weights", 0) database.meta = load_meta(b"YoloConfig/coco.data") # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_z, scores, graph, scfg = siam.build_tracking_graph( final_score_sz, design, env) #sFCgraph = siamfcGraph(filename, image, templates_z, scores) fourcc = cv2.VideoWriter_fourcc(*'MJPG') finalImages = [] plt.xticks([]), plt.yticks([]) YoloVid = cv2.VideoWriter('Yolov3Vid.avi', fourcc, 10, (testWidth, testHeight)) SiamfcVid = cv2.VideoWriter('SiamfcVid.avi', fourcc, 10, (testWidth, testHeight)) i = 0 notDone = True now = datetime.datetime.now() if (args.dataLocation == 'fromCamera'): haha = threading.Thread(target=getImages, args=('CameraData/', 10, VideoLength, database, now, graph, scfg)) haha.start() haha.join() frame_name_list = _init_video('CameraData/%d_%d_%d/' % (now.hour, now.minute, now.second)) while notDone and i < VideoLength and datetime.datetime.now( ) < now + datetime.timedelta(seconds=20 + VideoLength) and genVideos: try: if os.path.isdir( os.path.join( 'CameraData', '%d_%d_%d' % (now.hour, now.minute, now.second + i))): showYoloResult( os.path.join( 'CameraData', '%d_%d_%d' % (now.hour, now.minute, now.second + i)), YoloVid, False) showSiamFCResult( os.path.join( 'CameraData', '%d_%d_%d' % (now.hour, now.minute, now.second + i)), SiamfcVid, True) i = i + 1 else: time.sleep(.2) except (KeyboardInterrupt, SystemExit): notDone = False else: for dirName in dirList: if (doYolo): runYolo(dirName, database, fourcc, testWidth, testHeight, graph, scfg) if (genVideos): showYoloResult(dirName, YoloVid, False) if (doSiamfc and dirName == dirList[0]): runSiamfc(dirName, fourcc, testWidth, testHeight, graph, scfg) if (genVideos): showSiamFCResult(dirName, SiamfcVid, True) # haha.join() YoloVid.release() SiamfcVid.release() return
'z_target_h': tf.train.Feature(float_list=tf.train.FloatList( value=[z_target_h])), 'x_pos_x': tf.train.Feature(float_list=tf.train.FloatList( value=[x_pos_x])), 'x_pos_y': tf.train.Feature(float_list=tf.train.FloatList( value=[x_pos_y])), 'x_target_w': tf.train.Feature(float_list=tf.train.FloatList( value=[x_target_w])), 'x_target_h': tf.train.Feature(float_list=tf.train.FloatList( value=[x_target_h])) })) writer.write(example.SerializeToString()) writer.close() print("Writer closed.") print(tfrecord_name + '.tfrecords' + " is written to " + output_directory) if __name__ == "__main__": hp, evaluation, run, env, design = parse_arguments() transform2tfrecord("shuffled_data_list.txt", "training_dataset", "tfrecords", resize_width=design.resize_width, resize_height=design.resize_height)
def main_camera(): cam = cv2.VideoCapture(0) if not cam.isOpened(): exit() bboxes = np.zeros((10, 4)) # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all image, templates_z, scores = siam.build_tracking_graph_cam( final_score_sz, design, env) ret, frame = cam.read() print(frame.dtype) roi = get_roi(frame) pos_x, pos_y, target_w, target_h = convert_roi(roi[0][0], roi[0][1]) # pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame]) scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2), np.ceil(hp.scale_num / 2), hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d penalty = penalty / np.sum(penalty) context = design.context * (target_w + target_h) z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context))) x_sz = float(design.search_sz) / design.exemplar_sz * z_sz # thresholds to saturate patches shrinking/growing min_z = hp.scale_min * z_sz max_z = hp.scale_max * z_sz min_x = hp.scale_min * x_sz max_x = hp.scale_max * x_sz run_opts = {} with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: tf.global_variables_initializer().run() # Coordinate the loading of image files. # coord = tf.train.Coordinator() # threads = tf.train.start_queue_runners(coord=coord) # save first frame position (from ground-truth) bboxes[ 0, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h # TODO: convert roi[0] to the silly siam format image_, templates_z_ = sess.run( [image, templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.z_sz_ph: z_sz, image: frame }) new_templates_z_ = templates_z_ t_start = time.time() num_frames = 0 # Get an image from the queue while True: scaled_exemplar = z_sz * scale_factors scaled_search_area = x_sz * scale_factors scaled_target_w = target_w * scale_factors scaled_target_h = target_h * scale_factors ret, frame = cam.read() num_frames += 1 image_, scores_ = sess.run( [image, scores], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.x_sz0_ph: scaled_search_area[0], siam.x_sz1_ph: scaled_search_area[1], siam.x_sz2_ph: scaled_search_area[2], templates_z: np.squeeze(templates_z_), image: frame, }, **run_opts) scores_ = np.squeeze(scores_) # penalize change of scale scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :] scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :] # find scale with highest peak (after penalty) new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2))) # update scaled sizes x_sz = (1 - hp.scale_lr ) * x_sz + hp.scale_lr * scaled_search_area[new_scale_id] target_w = ( 1 - hp.scale_lr ) * target_w + hp.scale_lr * scaled_target_w[new_scale_id] target_h = ( 1 - hp.scale_lr ) * target_h + hp.scale_lr * scaled_target_h[new_scale_id] # select response with new_scale_id score_ = scores_[new_scale_id, :, :] score_ = score_ - np.min(score_) score_ = score_ / np.sum(score_) # apply displacement penalty score_ = (1 - hp.window_influence ) * score_ + hp.window_influence * penalty pos_x, pos_y = _update_target_position(pos_x, pos_y, score_, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz) # convert <cx,cy,w,h> to <x,y,w,h> and save output out = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h # out = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h # update the target representation with a rolling average if hp.z_lr > 0: new_templates_z_ = sess.run( [templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.z_sz_ph: z_sz, image: image_ }) templates_z_ = (1 - hp.z_lr) * np.asarray( templates_z_) + hp.z_lr * np.asarray(new_templates_z_) # update template patch size z_sz = (1 - hp.scale_lr ) * z_sz + hp.scale_lr * scaled_exemplar[new_scale_id] key = 0 if run.visualization: key = show_frame(image_, out) t_elapsed = time.time() - t_start speed = num_frames / t_elapsed if key == 120: print("Speed", speed) sess.close() cv2.destroyAllWindows() exit()
def main(): # Avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # --- Parse arguments from JSON file --- hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env) # --- Start Streaming from Live Video --- stream_path = "/home/hugogermain/stream.flv" cap = cv2.VideoCapture(stream_path) start_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT) # Start at last frame cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame - 15) ret, frame = cap.read() if (not ret): print "Error opening video sequence" # --- Save Video (Optional) --- vid_write = cv2.VideoWriter(env.root_sequences + '/stream_out.avi', cv2.VideoWriter_fourcc(*'MJPG'), 25, (frame.shape[1], frame.shape[0]), True) # --- Initialize projection maps --- e2s = equirect2stereograph(-2.5, frame, 0, 0) # =================================== # --- Define Initial Bounding Box --- # =================================== BB = click_and_crop(e2s.project(frame), design.window_name) cv2.namedWindow(design.window_name) cv2.startWindowThread() cv2.setMouseCallback(design.window_name, BB.callback) cv2.imshow(design.window_name, e2s.project(frame)) cv2.waitKey(1) while True: ret, frame = cap.read() cv2.waitKey(1) if ret: # --- Equirectangular to Stereographic Projection --- BB.img = e2s.project(frame) BB.refresh() # --- Reset to last frame to avoid cumulative lagging --- cap.release() cap = cv2.VideoCapture(stream_path) start_frame = cap.get( cv2.CAP_PROP_FRAME_COUNT) # Start at last frame cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame - 10) cv2.waitKey(1) start_frame += 1 else: # --- Reached end of file, wait for new frames --- cap.release() cap = cv2.VideoCapture(stream_path) cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame - 10) cv2.waitKey(1) # ---- Rotate Camera Viewpoint --- k = cv2.waitKey(33) if k == 119: # w e2s.set_lat(e2s.lat + 10) if k == 115: # s e2s.set_lat(e2s.lat - 10) if k == 100: # d e2s.set_roll(e2s.roll + 10) if k == 97: # a e2s.set_roll(e2s.roll - 10) # ---- Selection is done ---- if k == 113: # q break if BB.ready: break print("[INFO]: Bounding Box Selection: Done") # ----- Define Initial Bounding Box Params & Template ----- pos_x = int((BB.refPt[0][0] + BB.refPt[1][0]) / 2) # Template Center pos_y = int((BB.refPt[0][1] + BB.refPt[1][1]) / 2) # Template Center target_w = int(abs(BB.refPt[1][0] - BB.refPt[0][0])) # Template Width / 2 target_h = int(abs(BB.refPt[1][1] - BB.refPt[0][1])) # Template Height / 2 # =========================== # ----- Beging Tracking ----- # =========================== live_tracker(hp, run, design, pos_x, pos_y, target_w, target_h, final_score_sz, templates_z, scores, cap, vid_write, frame, stream_path, e2s) cap.release() cv2.destroyAllWindows() if run.save_video: vid_write.release()
def main(): hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 print("final_score_sz is:%d" % (final_score_sz)) gt, frame_name_list, frame_sz, n_frames = _init_video( env, evaluation, videos_path) num_frames = np.size(frame_name_list) scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2), np.ceil(hp.scale_num / 2), hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d penalty = penalty / np.sum(penalty) pos_x, pos_y, target_w, target_h = region_to_bbox(gt[start_frame]) context = design.context * (target_w + target_h) z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context))) x_sz = float(design.search_sz) / design.exemplar_sz * z_sz scaled_exemplar = z_sz * scale_factors scaled_search_area = x_sz * scale_factors scaled_target_w = target_w * scale_factors scaled_target_h = target_h * scale_factors # thresholds to saturate patches shrinking/growing min_z = hp.scale_min * z_sz max_z = hp.scale_max * z_sz min_x = hp.scale_min * x_sz max_x = hp.scale_max * x_sz #search size x_sz0_ph = scaled_search_area[0] x_sz1_ph = scaled_search_area[1] x_sz2_ph = scaled_search_area[2] image = Image.open(frame_name_list[0]) image.show() image = np.array(image) # used to pad the crops if design.pad_with_image_mean: avg_chan = np.mean(image, axis=(0, 1)) else: avg_chan = None # pad with if necessary frame_padded_z, npad_z = pad_frame_numpy(image, frame_sz, pos_y, pos_x, z_sz, avg_chan) # extract tensor of z_crops # print type(design.exemplar_sz) z_crops = extract_crops_z_numpy(frame_padded_z, npad_z, pos_y, pos_x, z_sz, design.exemplar_sz) print 'the shape of the img z_crops is :' + ' ' + str(np.shape(z_crops)) z_crops = np.squeeze(z_crops) img = Image.fromarray(z_crops.astype('uint8'), 'RGB') img.show() frame_padded_x, npad_x = pad_frame_numpy(image, frame_sz, pos_y, pos_x, x_sz2_ph, avg_chan) # extract tensor of x_crops (3 scales) x_crops = extract_crops_x_numpy(frame_padded_x, npad_x, pos_y, pos_x, x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz) print 'the shape of the img x_crops is :' + ' ' + str(np.shape(x_crops)) x_crops_1 = np.squeeze(x_crops[0, :, :]) img_1 = Image.fromarray(x_crops_1.astype('uint8'), 'RGB') img_1.show() x_crops_2 = np.squeeze(x_crops[1, :, :]) img_2 = Image.fromarray(x_crops_2.astype('uint8'), 'RGB') img_2.show() x_crops_3 = np.squeeze(x_crops[2, :, :]) img_3 = Image.fromarray(x_crops_3.astype('uint8'), 'RGB') img_3.show()
def main(): hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 siam = SiameseNet(env.root_pretrained, design.net) if torch.cuda.is_available(): siam = siam.cuda() if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) video_list = [v for v in os.listdir(dataset_folder) if not v[0] == '.'] video_list.sort() nv = np.size(video_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precision_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): print('video: %d' % (i + 1)) gt, frame_name_list, frame_sz, n_frame = _init_video( env, evaluation, video_list[i]) starts = np.rint( np.linspace(0, n_frame - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): starts_frame = int(starts[j]) gt_ = gt[starts_frame:, :] frame_name_list = frame_name_list[starts_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, siam, starts_frame) lengths[idx], precisions[idx], precision_auc[idx], ious[ idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold) print(str(i) + ' -- ' + video_list[i] + \ ' -- Precision: ' + "%.2f" %precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" %precision_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ '-- Speed: ' + "%.2f" % speed[idx] + ' --\n') tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_precision_auc = np.sum(precision_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print(' -- Overall stats (averaged per frame) on ' + str(nv) + 'videos(' + str(tot_frames) + 'frames) --') print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" %mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" %mean_iou +\ ' -- Speed: ' + "%.2f" %mean_speed + '-- \n') else: gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox( gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, siam, evaluation.start_frame) _, precision, precision_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) print(evaluation.video + \ ' -- precision ' + "(%d px)" %evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- precision AUC: ' + "%.2f" %precision_auc + \ ' -- IOU: ' + "%.2f" %iou +\ '-- Speed: ' + "%.2f" %speed + ' -- \n')
def main(argv): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' hp, evaluation, env, design = parse_arguments(root_dir) cmd_args = parse_command_line_arguments() if 'otb13' in cmd_args.dataset_name: dataset_type = 'otb13' elif 'otb15' in cmd_args.dataset_name: dataset_type = 'otb15' elif 'vot16' in cmd_args.dataset_name: dataset_type = 'vot16' elif 'vot17' in cmd_args.dataset_name: dataset_type = 'vot17' # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_x, templates_z, scores_list =\ siam.build_tracking_graph( root_dir, final_score_sz, design, env, hp) # iterate through all videos of dataset_name videos_folder = os.path.join(root_dir, env.root_dataset, cmd_args.dataset_name) videos_list = [ v for v in os.listdir(videos_folder) if os.path.isdir(os.path.join(videos_folder, v)) ] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) successes = np.zeros(nv * evaluation.n_subseq) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() vars_to_load = [] for v in tf.global_variables(): if 'postnorm' not in v.name: vars_to_load.append(v) siam_ckpt_name = 'pretrained/siam_mcf.ckpt-50000' siam_saver = tf.train.Saver(vars_to_load) siam_saver.restore(sess, siam_ckpt_name) for i in range(nv): gt, frame_name_list, frame_sz, n_frames = _init_video( videos_list[i], videos_folder, dataset_type) starts = np.rint( np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = track_one_sequence(hp, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_x, templates_z, scores_list, videos_list[i], dataset_type, sess, cmd_args.visualize, cmd_args.save_images, cmd_args.save_bboxes, vot_handle=None, gt=gt_) (lengths[idx], precisions[idx], precisions_auc[idx], ious[idx], successes[idx]) = _compile_results(gt_, bboxes, evaluation.dist_threshold) print( str(i) + ' -- ' + videos_list[i] + ' -- Precision: ' + "%.2f" % precisions[idx] + ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + ' -- IOU: ' + "%.2f" % ious[idx] + ' -- [email protected]: ' + "%.2f" % successes[idx] + ' -- Speed: ' + "%.2f" % speed[idx] + ' --') tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames mean_success = np.sum(successes * lengths) / tot_frames print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --') print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + '%.2f' % mean_precision + ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + ' -- IOU: ' + "%.2f" % mean_iou + ' -- [email protected]: ' + "%.2f" % mean_success + ' -- Speed: ' + "%.2f" % mean_speed + ' --')
def evaluate(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build the computational graph of Siamese fully-convolutional network siamNet = siam.Siamese(batch_size=1) # get tensors that will be used during tracking image, z_crops, x_crops, templates_z, scores, loss, _, distance_to_gt, summary = siamNet.build_tracking_graph_train( final_score_sz, design, env, hp) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv) precisions = np.zeros(nv) precisions_auc = np.zeros(nv) ious = np.zeros(nv) lengths = np.zeros(nv) for i in range(nv): gt, frame_name_list, frame_sz, n_frames = _init_video( env, evaluation, videos_list[i]) gt_ = gt[0:, :] frame_name_list_ = frame_name_list[0:] pos_x, pos_y, target_w, target_h = region_to_bbox( gt_[0] ) # coordinate of gt is the bottom left point of the bbox idx = i bboxes, speed[idx] = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, image, templates_z, scores, path_ckpt=os.path.join( design.saver_folder, design.path_ckpt), siamNet=siamNet) lengths[idx], precisions[idx], precisions_auc[idx], ious[ idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold) print(str(i) + ' -- ' + videos_list[i] + \ ' -- Precision: ' + "%.2f" % precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ ' -- Speed: ' + "%.2f" % speed[idx] + ' --') tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --') print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" % mean_iou +\ ' -- Speed: ' + "%.2f" % mean_speed + ' --') #evaluate only one vedio else: gt, frame_name_list, frame_sz, n_frames = _init_video( env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox(gt[0]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, image, templates_z, scores, path_ckpt=os.path.join(design.saver_folder, design.path_ckpt), siamNet=siamNet) _, precision, precision_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) print(evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --') return precision, precision_auc, iou, speed
def __init__(self, imagepath, region): self.track_count = 0 #Parameters self.exemplar_sz = 128 self.search_sz = 256 #Parse the arguments self.hp, self.evaluation, self.run, self.env, self.design = parse_arguments( mode='color') #Get first frame image and ground-truth self.region = region self.pos_x = region.x + region.width / 2 self.pos_y = region.y + region.height / 2 self.target_w = region.width self.target_h = region.height #Calculate the size of final score (upscaled size of score matrix, where score matrix # is convolution of results of two branches of siamese network) self.final_score_sz = self.hp.response_up * (self.design.score_sz - 1) #Calculate the scale factors self.scale_factors = self.hp.scale_step**np.linspace( -np.ceil(self.hp.scale_num / 2), np.ceil(self.hp.scale_num / 2), self.hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(self.final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d self.penalty = penalty / np.sum(penalty) #Calculate search and target patch sizes context = self.design.context * (self.target_w + self.target_h) self.z_sz = np.sqrt( np.prod((self.target_w + context) * (self.target_h + context))) self.x_sz = float( self.design.search_sz) / self.design.exemplar_sz * self.z_sz #Initialize the network self.features_x, self.features_z, self.scores, self.z_crops, self.x_crops = self.InitNetwork( ) latest_checkpoint = "/media/engin/63c43c7a-cb63-4c43-b70c-f3cb4d68762a/models/wbaek_colorization/model1_18022020/model.ckpt-56000" config1 = tf.ConfigProto() config1.gpu_options.visible_device_list = "1" config1.gpu_options.per_process_gpu_memory_fraction = 0.45 #Load the model for search branch with self.graph_search.as_default(): self.session_search = tf.Session(graph=self.graph_search, config=config1) saver = tf.train.Saver(tf.global_variables()) saver.restore(self.session_search, latest_checkpoint) #Load the model for exemplar branch with self.graph_exemplar.as_default(): self.session_exemplar = tf.Session(graph=self.graph_exemplar, config=config1) saver = tf.train.Saver(tf.global_variables()) saver.restore(self.session_exemplar, latest_checkpoint) config2 = tf.ConfigProto() config2.gpu_options.visible_device_list = "1" config2.gpu_options.per_process_gpu_memory_fraction = 0.1 #Create a session for matching branch with self.graph_match.as_default(): self.session_match = tf.Session(graph=self.graph_match, config=config2) #Calculate the score for template # Run the template session with self.graph_exemplar.as_default(): self.templates_z_, z_crops_ = self.session_exemplar.run( [self.features_z, self.z_crops], feed_dict={ self.exemplar_ph['filename_ph']: imagepath, self.exemplar_ph['pos_x_ph']: self.pos_x, self.exemplar_ph['pos_y_ph']: self.pos_y, self.exemplar_ph['z_sz_ph']: self.z_sz }) #Write the template image z_crops_image = Image.fromarray( np.reshape(z_crops_, (128, 128)).astype(np.uint8)) z_crops_image.save("/home/engin/Documents/output/template.jpg") return
def main(): # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 siam = SiameseNet(env.root_pretrained, design.net) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder) if not v[0] == '.'] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): # -- Iterate through all videos print('video: %d' % (i + 1)) gt, frame_name_list, frame_sz, n_frames = _init_video(env, evaluation, videos_list[i]) starts = np.rint(np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): # -- Iterate through a single video start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j # bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, # target_w, target_h, final_score_sz, filename, # image, templates_z, scores, start_frame) bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, siam, start_frame) # -- here is where tracker.py is called lengths[idx], precisions[idx], precisions_auc[idx], ious[idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold) print(str(i) + ' -- ' + videos_list[i] + \ ' -- Precision: ' + "%.2f" % precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ ' -- Speed: ' + "%.2f" % speed[idx] + ' --\n') tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --') print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" % mean_iou +\ ' -- Speed: ' + "%.2f" % mean_speed + ' --\n') else: gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame]) # bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, # filename, image, templates_z, scores, evaluation.start_frame) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, siam, evaluation.start_frame) _, precision, precision_auc, iou = _compile_results(gt, bboxes, evaluation.dist_threshold) print(evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --\n')