Пример #1
0
    def build_sot(self):
        # load config
        cfg.merge_from_file(self.args.config)
        cfg.CUDA = torch.cuda.is_available()
        device = torch.device('cuda' if cfg.CUDA else 'cpu')

        # create model
        model = ModelBuilder()

        # load model
        model.load_state_dict(torch.load(self.args.snapshot,
            map_location=lambda storage, loc: storage.cpu()))
        model.eval().to(device)

        # build tracker
        tracker = build_tracker(model)
        return tracker
Пример #2
0
def test_snapshot(epoch: int, snapshot: str, test_path: str):
    # model
    max_img = 8
    model = ModelBuilder()
    data = torch.load(snapshot,
                      map_location=lambda storage, loc: storage.cpu())
    model.load_state_dict(data['state_dict'])
    model.eval().to(torch.device('cpu'))
    tracker = build_tracker(model)

    root = cfg.DATASET.COCO.ROOT
    cur_path = os.path.dirname(os.path.realpath(__file__))
    root = os.path.join(cur_path, '../../', root)
    anno_path = os.path.join(root, '../', "val2017.json")
    with open(anno_path, 'r') as f:
        anno = json.load(f)
        anno = filter_zero(anno)
    dataset = os.path.join(root, "val2017")
    folder = random.choice(glob.glob(f"{dataset}/**"))
    zs = glob.glob(f"{folder}/*.z.jpg")
    xs = glob.glob(f"{folder}/*.x.jpg")

    zs = sorted(zs)
    xs = sorted(xs)

    xs = [(x, get_anno_from_img_path(anno, x)) for x in xs]

    for i in range(len(zs[:max_img])):
        z = cv2.imread(zs[i])
        x_path, bbox = xs[i]
        x = cv2.imread(x_path)
        tracker.init_(z)
        cls, (x1, y1, x2, y2) = tracker.track(x)
        cv2.rectangle(x, (x1, y1), (x2, y2), (255, 0, 0), 2)
        a1, b1, a2, b2 = bbox
        cv2.rectangle(x, (a1, b1), (a2, b2), (0, 0, 255), 2)
        cv2.putText(x, 'Acc: ' + cls.astype('str'), (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
        parent_dir = f"{test_path}/{os.path.basename(Path(zs[i]).parent)}"
        if not os.path.exists(parent_dir):
            os.makedirs(parent_dir)
        cv2.imwrite(f"{parent_dir}/{os.path.basename(x_path)}", x)
        cv2.imwrite(f"{parent_dir}/{os.path.basename(zs[i])}", z)
Пример #3
0
    def __init__(self):
        self.init_rect = None

        self.pysot_pub = rospy.Publisher(config.TRACK_PUB_TOPIC,
                                         Int32MultiArray,
                                         queue_size=10)
        self.img_sub = rospy.Subscriber(config.IMAGE_SUB_TOPIC, Image,
                                        self.receive_frame_and_track)
        self.service = rospy.Service("init_rect", InitRect, self.set_init_rect)

        cfg.TRACK.TYPE = config.TRACK_TYPE
        cfg.merge_from_file(config.CONFIG_PATH)
        cfg.CUDA = torch.cuda.is_available()
        device = torch.device('cuda' if cfg.CUDA else 'cpu')
        model = ModelBuilder()
        model.load_state_dict(
            torch.load(config.MODEL_PATH,
                       map_location=lambda storage, loc: storage.cpu()))
        model.eval().to(device)

        self.tracker = build_tracker(model)
Пример #4
0
    def init_track(self):

        # 配置config文件
        config_path = './models/siamrpn_alex_dwxcorr/config.yaml'
        # 配置snapshot 文件
        snapshot_path = './models/siamrpn_alex_dwxcorr/model.pth'

        # 参数整合
        cfg.merge_from_file(config_path)
        cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
        device = torch.device('cuda' if cfg.CUDA else 'cpu')

        # create model
        model = ModelBuilder()

        # load model
        model.load_state_dict(
            torch.load(snapshot_path,
                       map_location=lambda storage, loc: storage.cpu()))
        model.eval().to(device)

        # 创建跟踪器
        self.tracker = build_tracker(model)
Пример #5
0
 def __init__(self, parent=None):
     super(MyMainWindow, self).__init__(parent)
     self.isDracula = False
     # Connect the on-clicked functions
     self.pushButton_locationLoading.clicked.connect(self.location_loading)
     self.pushButton_videoLoading.clicked.connect(self.video_loading)
     self.pushButton_cameraLoading.clicked.connect(self.camera_loading)
     self.pushButton_bboxSetting.clicked.connect(self.bbox_setting)
     self.pushButton_algorithmProcessing.clicked.connect(
         self.algorithm_processing)
     self.scrollBar.valueChanged.connect(self.slider_change)
     self.selectBox.valueChanged.connect(self.select_change)
     self.checkBox.stateChanged.connect(self.checkbox_change)
     # Message box ignore
     self.bbox_tips = True
     self.save_tips = True
     # Initialize trackers
     model_location = './pysot/experiments/siammaske_r50_l3'
     self.config = model_location + '/config.yaml'
     self.snapshot = model_location + '/model.pth'
     self.tracker_name = model_location.split('/')[-1]
     self.video_name = ''
     cfg.merge_from_file(self.config)
     cfg.CUDA = torch.cuda.is_available()
     device = torch.device('cuda' if cfg.CUDA else 'cpu')
     model = ModelBuilder()
     model.load_state_dict(
         torch.load(self.snapshot,
                    map_location=lambda storage, loc: storage.cpu()))
     model.eval().to(device)
     self.tracker = build_tracker(model)
     self.vs = None
     self.analysis_box = None
     self.analysis_max = 10
     self.save_location = ''
     self.afterCamera = False
     self.bbox_list_predict = []  # [time][tracker]
Пример #6
0
def run_tracker_pysot(args):

    # load config
    config = f'pysot/experiments/{args.tracker_name}/config.yaml'
    snapshot = f'pysot/experiments/{args.tracker_name}/model.pth'

    cfg.merge_from_file(config)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(snapshot, map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    if args.YT_ID:
        video_name = args.YT_ID.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    # cv2.namedWindow(args.YT_ID, cv2.WND_PROP_FULLSCREEN)
    pred_bboxes = []
    for frame in get_frames(args):
        if first_frame:
            try:
                init_rect = np.loadtxt(str(
                    os.path.join(args.path, 'Sequences',
                                 args.YT_ID + '_' + str(args.ID),
                                 'initial_BB.txt')),
                                       delimiter=',',
                                       dtype=np.float64)

            except:
                exit()
            tracker.init(frame, init_rect)
            pred_bboxes.append(init_rect)
            first_frame = False
        else:
            outputs = tracker.track(frame)
            bbox = list(map(int, outputs['bbox']))
            pred_bbox = outputs['bbox']
            pred_bboxes.append(pred_bbox)
            # cv2.rectangle(frame, (bbox[0], bbox[1]),
            #                 (bbox[0]+bbox[2], bbox[1]+bbox[3]),
            #                 (0, 255, 0), 3)
            # cv2.imshow(args.YT_ID, frame)

            # cv2.waitKey(40)
    model_path = os.path.join(args.path, 'Sequences',
                              args.YT_ID + '_' + str(args.ID), 'results',
                              args.tracker_name)
    if not os.path.isdir(model_path):
        os.makedirs(model_path)
    result_path = os.path.join(model_path, f'{video_name}.txt')
    with open(result_path, 'w') as f:
        for x in pred_bboxes:
            f.write(','.join([str(i) for i in x]) + '\n')
Пример #7
0
def main():
    #load parameters
    parser = argparse.ArgumentParser(description='tracking demo')
    parser.add_argument('--config', type=str, help='config file',default=config)
    parser.add_argument('--snapshot', type=str, help='model name',default=snapshot)
    parser.add_argument('--video_name',type=str, help='videos or image files',default=video)
    args = parser.parse_args()

    # load config
    cfg.merge_from_file(args.config)
    
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()
    # load model
    model.load_state_dict(torch.load(args.snapshot,
        map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)
    
    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
        imgname = args.video_name.split('/')[-3].split('.')[0]
        imgname2 = imgname.split('_')[-2].split('.')[0]+'_'+imgname.split('_')[-1].split('.')[0]
        print(imgname2)
        print('model:'+param+' video_name:'+ imgname)
    else:
        video_name = 'webcam'
    
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)
    directory='testing_dataset/result/'+param+'/'
    if not os.path.exists(directory):
        os.makedirs(directory)
    ################################变量初始化###################################
    sum = 0
    timer=0
    num=0
    gif_images=[]#gif图
    ############################################################################
    for frame in get_frames(args.video_name):
        start = cv2.getTickCount()
        #if num==0:#directory+imgname+".avi"
            #videoWriter = cv2.VideoWriter(directory+imgname+'.avi',cv2.VideoWriter_fourcc("X", "V", "I", "D"),50,(frame.shape[1],frame.shape[0]))#img.shape[1],img.shape[0]
        num=num+1
        if first_frame:
            try:
                sss='testing_dataset/rssrai/'+imgname+'/groundtruth.txt' #修改成你自己的测试视频路径
                rect=open(sss,'r')
                data=rect.readline()
                data2=data.split(',')
                data2=map(int,data2)
                rect2=list(data2)
                #init_rect = cv2.selectROI(video_name, frame, False, False)
                init_rect=rect2
                print(init_rect)
                # f=open(directory+imgname+'.txt','w')
                # f.write(str(rect2[0])+','+str(rect2[1])+','+str(rect2[2])+','+str(rect2[3]))
                # f.close()
                # f=open(directory+imgname+'.txt','a')
            except:
                exit()
            
            tracker.init(frame, init_rect)
            first_frame = False
        else:
            outputs = tracker.track(frame)
            end = cv2.getTickCount()
            during = (end - start) / cv2.getTickFrequency()
            timer=timer+during
            if 'polygon' in outputs:
                polygon = np.array(outputs['polygon']).astype(np.int32)
                cv2.polylines(frame, [polygon.reshape((-1, 1, 2))],
                              True, (0,255, 0), 3)
                mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
                mask = mask.astype(np.uint8)
                mask = np.stack([mask, mask, mask*255]).transpose(1, 2, 0)
                gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
                ret, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)

                contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
                c = sorted(contours, key=cv2.contourArea, reverse=True)[0] #面积最大的轮廓区域
                rect_new2= cv2.boundingRect(c)
                frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)
                cv2.rectangle(frame, (rect_new2[0], rect_new2[1]),
                               (rect_new2[0]+rect_new2[2], rect_new2[1]+rect_new2[3]),
                               (0, 0, 255), 2)
                # f.write('\n'+str(rect_new2[0])+','+str(rect_new2[1])+','+str(rect_new2[2])+','+str(rect_new2[3]))
                
                #######################################################################################
                # while 1:
                #     if cv2.waitKey(0)==97:
                #         break
                #     elif(cv2.waitKey(0)==27):
                #         return
            else:
                bbox = list(map(int, outputs['bbox']))
                cv2.rectangle(frame, (bbox[0], bbox[1]),
                              (bbox[0]+bbox[2], bbox[1]+bbox[3]),
                              (0, 255, 0), 2)
                # f.write('\n'+str(bbox[0])+','+str(bbox[1])+','+str(bbox[2])+','+str(bbox[3]))
            cv2.putText(frame, imgname2, (5, 50), cv2.FONT_HERSHEY_COMPLEX, 2.0, (255, 0,0), 2)
            cv2.putText(frame, str(num), (5, 120), cv2.FONT_HERSHEY_COMPLEX, 2.0, (255, 0,0), 2)
            cv2.namedWindow(video_name,0)
            cv2.resizeWindow(video_name,1000,800)
            cv2.imshow(video_name, frame)
            #gif_images.append(frame)
            #videoWriter.write(frame)
            cv2.waitKey(30)
    #imageio.mimsave(directory+imgname+'.gif',gif_images,'GIF',duration = 0.02)#速度太慢
    #f.close()
    fps=int(num/timer)
    print('FPS:%d'%(fps))
Пример #8
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(args.snapshot,
                   map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)
    stat_time = []
    for frame in get_frames(args.video_name):

        original = frame.copy()
        if first_frame:
            try:
                init_rect = cv2.selectROI(video_name, frame, False, False)
            except:
                exit()
            tracker.init(frame, init_rect)
            first_frame = False
        else:
            cur_time = time.time()
            outputs = tracker.track(frame)
            if 'polygon' in outputs:
                polygon = np.array(outputs['polygon']).astype(np.int32)
                cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True,
                              (0, 255, 0), 3)
                mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
                mask = mask.astype(np.uint8)
                mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0)
                final_frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)
            else:
                bbox = list(map(int, outputs['bbox']))
                frame_showing = frame.copy()
                cv2.rectangle(frame_showing, (bbox[0], bbox[1]),
                              (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                              (0, 255, 0), 3)
                x1, y1, x2, y2 = bbox[0], bbox[
                    1], bbox[0] + bbox[2], bbox[1] + bbox[3]
                frame_output = np.zeros_like(frame)
                frame_output[y1:y2, x1:x2] = 255
                final_frame = cv2.hconcat((frame_output, frame_showing))

            cv2.imshow(video_name, final_frame)
            keyPressed = cv2.waitKey(1) & 0xff

            if keyPressed == 27 or keyPressed == 1048603:
                print('exited the program by pressing ESC')
                break  # esc to quit

            stat_time.append(time.time() - cur_time)
            # print('iteration time = ', time.time()-cur_time)
    print('average iteration time =', np.average(stat_time))
Пример #9
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(args.snapshot,
                   map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)

    class_name = get_classname(args.video_name)
    for frame, image_path in get_frames(args.video_name, args.start_index):
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frame = cv2.equalizeHist(frame)
        frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
        if first_frame:
            while True:
                try:
                    init_rect = cv2.selectROI(video_name, frame, False, False)
                    # init_rect = (311, 136, 120, 125)
                except:
                    exit()
                if check_rect(init_rect):
                    break
            tracker.init(frame, init_rect)
            first_frame = False
            labels = [[
                init_rect[0], init_rect[1], init_rect[0] + init_rect[2],
                init_rect[1] + init_rect[3]
            ]]
        else:
            outputs = tracker.track(frame)
            if 'polygon' in outputs:
                polygon = np.array(outputs['polygon']).astype(np.int32)
                cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True,
                              (0, 255, 0), 3)
                mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
                mask = mask.astype(np.uint8)
                mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0)
                frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)
            else:
                bbox = list(map(int, outputs['bbox']))
                cv2.rectangle(frame, (bbox[0], bbox[1]),
                              (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                              (0, 255, 0), 3)
            print(outputs['best_score'])
            if outputs['best_score'] < 0.9:
                first_frame = True
                continue
            labels = [[bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]]
            cv2.imshow(video_name, frame)
            cv2.waitKey(40)
        labels[0].append(class_name)
        generate_label(labels, frame.shape[:2], path=image_path)
Пример #10
0
            frame = cv2.imread(img)
            yield frame


if __name__ == '__main__':
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(args.snapshot,
                   map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)
    # init_frame = np.load('../chainer-pysot/init.npz')['frame']
    # init_rect = np.load('../chainer-pysot/init.npz')['init_rect']
    # second_frame = np.load('../chainer-pysot/output.npz')['second_frame']

    # tracker.init(init_frame, init_rect)
    # np.savez('../chainer-pysot/init.npz',
    #     frame=init_frame, zfs=[f.detach().cpu().numpy() for f in tracker.model.zf], init_rect=init_rect)
    # tracker.track(second_frame)
    # raise ValueError
Пример #11
0
def showImage():

    global x1, y1, x2, y2, drawing, init, flag, image, getim, start
    rospy.init_node('RPN', anonymous=True)

    flag = 1
    init = False
    drawing = False
    getim = False
    start = False
    x1, x2, y1, y2 = -1, -1, -1, -1
    flag_lose = False
    count_lose = 0

    print('laoding model...........')
    path = sys.path[0]
    path = path[0:-5] + 'third-party/pysot/'
    cfg.merge_from_file(path +
                        '/experiments/siamrpn_r50_l234_dwxcorr/config.yaml')
    cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
    device = torch.device('cuda' if cfg.CUDA else 'cpu')
    model = ModelBuilder()
    #model = load_pretrain(model, '/home/develop/ros/src/fly-vision-1/third-party/pysot/pretrained/model.pth').cuda().eval()
    pre = torch.load(path + 'pretrained/model.pth')

    model.load_state_dict(pre)
    model.cuda().eval()
    tracker = build_tracker(model)

    print('ready for starting!')

    rospy.Subscriber('/camera/rgb/image_raw', Image, callback)
    pub = rospy.Publisher('/vision/target', Pose, queue_size=10)
    cv2.namedWindow('image')
    cv2.setMouseCallback('image', draw_circle)
    rate = rospy.Rate(30)
    i = 1
    t = time.time()
    fps = 0
    while not rospy.is_shutdown():

        if getim:
            t1 = time.time()
            idd = readid(image)

            pose = Pose()
            pose.position.z = 0

            if start is False and init is True:
                init_rect = np.array([x1, y1, x2 - x1, y2 - y1])
                tracker.init(image, init_rect)

                start = True
                flag_lose = False
                continue

            if start is True:

                outputs = tracker.track(image)
                bbox = list(map(int, outputs['bbox']))

                res = [int(l) for l in bbox]
                cv2.rectangle(image, (res[0], res[1]),
                              (res[0] + res[2], res[1] + res[3]),
                              (0, 255, 255), 2)
                pose.position.x = (bbox[0] + bbox[2] / 2 -
                                   image.shape[1] / 2) / (image.shape[1] / 2)
                pose.position.y = (bbox[1] + bbox[3] / 2 -
                                   image.shape[0] / 2) / (image.shape[0] / 2)
                cv2.putText(image, str(outputs['best_score']),
                            (res[0] + res[2], res[1] + res[3]),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1)
                pose.position.z = 1
                if outputs['best_score'] < 0.5:

                    count_lose = count_lose + 1
                else:
                    count_lose = 0
                if count_lose > 4:
                    flag_lose = True

            if flag_lose is True:
                cv2.putText(image, 'target is lost!', (200, 200),
                            cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3)
                pose.position.z = -1

            if drawing is True:
                cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

            cv2.putText(image, '#' + str(idd), (30, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
            cx = int(image.shape[1] / 2)
            cy = int(image.shape[0] / 2)
            cv2.line(image, (cx - 20, cy), (cx + 20, cy), (255, 255, 255), 2)
            cv2.line(image, (cx, cy - 20), (cx, cy + 20), (255, 255, 255), 2)

            pub.publish(pose)

            if start is True:

                i = i + 1
            if i > 5:
                i = 1
                fps = 5 / (time.time() - t)
                t = time.time()
            cv2.putText(image, 'fps=' + str(fps), (200, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)

            cv2.imshow('image', image)
            cv2.waitKey(1)
            getim = False

        rate.sleep()
Пример #12
0
def main(threshold_correct, tolerance):
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(args.snapshot,
                   map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)
    counter_threshold_correct = 0
    counter_tolerance = 0
    previous_color = "none"
    clf = KNNClassifier(1)
    clf.load(
        "/Users/aussabbood/github/SwaliO/pysot/tools/saved_models/model.pkl")
    for frame in get_frames(args.video_name):
        if first_frame:
            try:
                init_rect = cv2.selectROI(video_name, frame, False, False)
            except:
                exit()
            tracker.init(frame, init_rect)
            first_frame = False
        else:
            outputs = tracker.track(frame)
            if 'polygon' in outputs:
                polygon = np.array(outputs['polygon']).astype(np.int32)
                cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True,
                              (0, 255, 0), 3)
                mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) *
                        255).astype(np.uint8)
                mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0)
                frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)
            else:
                bbox = list(map(int, outputs['bbox']))
                cv2.rectangle(frame, (bbox[0], bbox[1]),
                              (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                              (0, 255, 0), 3)
                frame_excerpt = frame[bbox[1]:(bbox[1] + bbox[3]),
                                      bbox[0]:(bbox[0] + bbox[2])]
                try:
                    pred = clf.predict(frame_excerpt)
                except cv2.error:
                    print("[WARNING] Force none due to error!")
                    pred = ['none']
                print(pred[0])
                if pred[0] == previous_color:
                    counter_threshold_correct += 1
                else:
                    counter_tolerance += 1
                    if counter_tolerance == tolerance:
                        counter_threshold_correct = 0
                        counter_tolerance = 0
                if counter_threshold_correct == 10:
                    print(f"Its a {pred[0]} bottle")
                    counter_threshold_correct = 0
                    counter_tolerance = 0
                previous_color = pred[0]
                print(f"threshold:{counter_threshold_correct}")
                print(f"tollerance:{counter_tolerance}")
            cv2.imshow(video_name, frame)
            cv2.waitKey(40)
Пример #13
0
def main():
    # Initialize ecci sdk and connect to the broker in edge-cloud
    ecci_client = Client()
    mqtt_thread = threading.Thread(target=ecci_client.initialize)
    mqtt_thread.start()
    ecci_client.wait_for_ready()
    print('edge start --------')

    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    checkpoint = torch.load(args.snapshot)
    model.load_state_dict(checkpoint)
    for param in model.parameters():
        param.requires_grad = False
    model.eval().to(device)

    #multiprocessing
    manager = mp.Manager()
    resQueue = manager.Queue()
    multiProcess = []
    label = []
    probs = []

    for i in range(10):
        multiProcess.append(build_multitracker(model, label, probs, resQueue))
        multiProcess[i].start()

    first_frame = True
    image_files = sorted(glob.glob('./test/image/*.JPEG'))

    for f, image_file in enumerate(image_files):
        frame = cv2.imread(image_file)

        if first_frame:
            # keyframe need to be uploaded to cloud
            print('first frame')
            key_frame = frame

            payload = {"type": "data", "contents": {"frame": frame}}
            print("####################", payload)
            ecci_client.publish(payload, "cloud")

            cloud_data = ecci_client.get_sub_data_payload_queue().get()
            print("###########recieve data from cloud", cloud_data)
            bbox = cloud_data["bbox"]
            label = cloud_data["label"]
            probs = cloud_data["probs"]
            num_process = len(bbox)

            t_detect_start = time.time()
            for i in range(num_process):
                init_rect = [
                    bbox[i][0], bbox[i][1], bbox[i][2] - bbox[i][0],
                    bbox[i][3] - bbox[i][1]
                ]
                multiProcess[i].init(frame, init_rect, label[i], probs[i])
                cv2.rectangle(frame, (int(bbox[i][0]), int(bbox[i][1])),
                              (int(bbox[i][2]), int(bbox[i][3])), (0, 0, 255),
                              3)
                cv2.putText(frame,
                            '%s: %.3f' % (label[i], probs[i]),
                            (bbox[i][0], bbox[i][1] - 15),
                            cv2.FONT_HERSHEY_PLAIN,
                            1.0, (0, 0, 255),
                            thickness=1)
            t_detect_end = time.time()
            print("detect fps : ", 1 / (t_detect_end - t_detect_start))

            first_frame = False
            index = 1

        elif index % 10 == 0:
            if is_key(key_frame, frame) or index % 20 == 0:
                # keyframe need to be uploaded to cloud ##### outputs, time ######
                print('key frame')
                key_frame = frame

                payload = {"type": "data", "contents": {"frame": frame}}
                print("####################", payload)
                ecci_client.publish(payload, "cloud")

                cloud_data = ecci_client.get_sub_data_payload_queue().get()
                print("###########recieve data from cloud", cloud_data)
                bbox = cloud_data["bbox"]
                label = cloud_data["label"]
                probs = cloud_data["probs"]
                num_process = len(bbox)

                t_detect_start = time.time()
                for i in range(num_process):
                    init_rect = [
                        bbox[i][0], bbox[i][1], bbox[i][2] - bbox[i][0],
                        bbox[i][3] - bbox[i][1]
                    ]
                    multiProcess[i].init(frame, init_rect, label[i], probs[i])
                    cv2.rectangle(frame, (int(bbox[i][0]), int(bbox[i][1])),
                                  (int(bbox[i][2]), int(bbox[i][3])),
                                  (0, 0, 255), 3)
                    cv2.putText(frame,
                                '%s: %.3f' % (label[i], probs[i]),
                                (bbox[i][0], bbox[i][1] - 15),
                                cv2.FONT_HERSHEY_PLAIN,
                                1.0, (0, 0, 255),
                                thickness=1)
                t_detect_end = time.time()
                print("detect fps : ", 1 / (t_detect_end - t_detect_start))

                index = 1
            else:
                print('non-key frame')
                t_track_start = time.time()
                for i in range(num_process):
                    multiProcess[i].track(frame)
                t_track_end = time.time()
                print("track fps : ", 1 / (t_track_end - t_track_start))

                for i in range(num_process):
                    resDict = resQueue.get()
                    print(resDict)
                    bbox = list(map(int, resDict['bbox']))
                    cv2.rectangle(frame, (bbox[0], bbox[1]),
                                  (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                  (0, 255, 0), 3)
                    cv2.putText(frame,
                                '%s: %.3f' % (label[i], probs[i]),
                                (bbox[0], bbox[1] - 15),
                                cv2.FONT_HERSHEY_PLAIN,
                                1.0, (0, 255, 0),
                                thickness=1)
                index += 1

        else:
            print('non-key frame')
            t_track_start = time.time()
            for i in range(num_process):
                multiProcess[i].track(frame)
            t_track_end = time.time()
            print("track fps : ", 1 / (t_track_end - t_track_start))

            for i in range(num_process):
                resDict = resQueue.get()
                print(resDict)
                bbox = list(map(int, resDict['bbox']))
                cv2.rectangle(frame, (bbox[0], bbox[1]),
                              (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                              (0, 255, 0), 3)
                cv2.putText(frame,
                            '%s: %.3f' % (label[i], probs[i]),
                            (bbox[0], bbox[1] - 15),
                            cv2.FONT_HERSHEY_PLAIN,
                            1.0, (0, 255, 0),
                            thickness=1)
            index += 1

        cv2.imwrite('./test/output/%s.jpg' % f, frame)

    for i in range(10):
        multiProcess.append(build_multitracker(model, label, probs, resQueue))
        multiProcess[i].join()
Пример #14
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(args.snapshot,
                   map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)

    class_name = get_classname(args.video_name)
    mot_tracker = MOTrackerWrapper(tracker, class_name)

    for frame, image_path in get_frames(args.video_name, args.start_index):
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frame = cv2.equalizeHist(frame)
        frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
        if first_frame:
            init_rects = []
            while True:
                try:
                    init_rect = cv2.selectROI(video_name, frame, False, False)
                    # init_rect = (311, 136, 120, 125)
                except:
                    exit()
                if check_rect(init_rect):
                    init_rects.append(init_rect)
                else:
                    break
            mot_tracker.init(frame, init_rects)
            first_frame = False
        else:
            has_uncertainty = mot_tracker.track(frame)
            labels = mot_tracker.labels

            for bbox in labels:
                cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                              (0, 255, 0), 3)
            cv2.imshow(video_name, frame)
            cv2.waitKey(40)
            if has_uncertainty or (cv2.waitKey(0) == ord('m')):
                first_frame = True
                continue
        labels = mot_tracker.labels
        if len(labels) == 0:
            continue
        generate_label(labels, frame.shape[:2], path=image_path)
Пример #15
0
def main():
    os.makedirs('outputs/', exist_ok=True)
    video_dir = '/home/pris1/Downloads/clips'
    video_file_list = os.listdir(video_dir)
    vp_to_file_dict = dict()
    for videofile in video_file_list:
        tracker, video, person, sec_start, _, sec_end = os.path.splitext(videofile)[0].split('%')
        if tracker == args.human_tracker:
            vp = video+'.'+person
            vp_to_file_dict[vp] = [videofile, int(sec_start)]

    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = True
    device = torch.device('cuda')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(torch.load(args.snapshot,
        map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    vp_list = pickle.load(open(args.vp_list, 'rb'))
    proposal_dict = pickle.load(open(args.proposal_dict, 'rb'))
    vp_dict = pickle.load(open(args.vp_dict, 'rb'))
    if args.range[-1] == -1:
        args.range[-1] == len(vp_dict)

    vp_range = vp_list[args.range[0]:args.range[1]]
    for vp_idx, vp in enumerate(vp_range):
        videofile, sec_start = vp_to_file_dict[vp]
        video, person = vp.split('.')
        start_vf = video+'.'+str(sec_start)
        if start_vf in proposal_dict:
            start_length = len(proposal_dict[start_vf])
            for obj_idx in range(start_length):
                print('vp: {} / {}, obj: {} / {}'.format(vp_idx+1, len(vp_range), obj_idx+1, start_length))
                track_id = '%'.join([video, person, str(obj_idx)])
                track_dict = dict()
                start_rect = copy.deepcopy(proposal_dict[start_vf][obj_idx])
                videopath = os.path.join(video_dir, videofile)
                cap = cv2.VideoCapture(videopath)
                fps = int(cap.get(cv2.CAP_PROP_FPS))
                frame_idx = 0
                first_frame = True
                while True:
                    ret, frame = cap.read()
                    if ret:
                        sec = frame_idx // fps + sec_start
                        remained = frame_idx % fps
                        
                        if remained == 0:
                            if first_frame:
                                init_rect = copy.deepcopy(start_rect) # xywh
                                track_dict[sec] = copy.deepcopy(init_rect)
                                init_rect[2] -= init_rect[0]
                                init_rect[3] -= init_rect[1]
                                tracker.init(frame, init_rect)
                                first_frame = False
                            else:
                                outputs = tracker.track(frame)
                                bbox = outputs['bbox'] # xywh
                                bbox[2] += bbox[0]
                                bbox[3] += bbox[1]
                                vf = video+'.'+str(sec)
                                if vf in proposal_dict:
                                    proposals = copy.deepcopy(proposal_dict[vf])
                                    track_results = np.expand_dims(np.array(bbox), axis=0)
                                    iou_mat = calc_iou(track_results, proposals)[0]
                                    max_proposal_idx = np.argmax(iou_mat)
                                    proposal_adjust = copy.deepcopy(proposals[max_proposal_idx])
                                    track_dict[sec] = copy.deepcopy(proposal_adjust)
                                    proposal_adjust[2] -= proposal_adjust[0]
                                    proposal_adjust[3] -= proposal_adjust[1]
                                    tracker.init(frame, proposal_adjust)
                                else:
                                    track_dict[sec] = copy.deepcopy(bbox)
                        else:
                            outputs = tracker.track(frame)

                        frame_idx += 1
                    else:
                        break
                pickle.dump(track_dict, open('outputs/{}.pkl'.format(track_id), 'wb'))
        else:
            pass
Пример #16
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(args.snapshot,
                   map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)
    for frame in get_frames(args.video_name):

        # yy
        # plt.imshow(frame)
        # plt.show()
        # os.system("pause")

        if first_frame:
            try:
                init_rect = cv2.selectROI(video_name, frame, False, False)
            except:
                exit()
            tracker.init(frame, init_rect)

            #yy
            #print(init_rect)
            cropRect0 = frame[init_rect[1]:init_rect[1] + init_rect[3],
                              init_rect[0]:init_rect[0] + init_rect[2]]
            #plt.imshow(cropRect0)
            #plt.show()

            first_frame = False
        else:
            outputs = tracker.track(frame)
            #yy
            #print(outputs['bbox'])
            #print(outputs['bbox'][0])
            track_rect = outputs['bbox']
            track_rect[0] = int(outputs['bbox'][0])
            track_rect[1] = int(outputs['bbox'][1])
            track_rect[2] = int(outputs['bbox'][2])
            track_rect[3] = int(outputs['bbox'][3])
            cropRectI = frame[track_rect[1]:track_rect[1] + track_rect[3],
                              track_rect[0]:track_rect[0] + track_rect[2]]
            #plt.imshow(cropRectI)
            #plt.show()

            if 'polygon' in outputs:
                polygon = np.array(outputs['polygon']).astype(np.int32)
                cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True,
                              (0, 255, 0), 3)
                mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
                mask = mask.astype(np.uint8)
                mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0)
                frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)
            else:
                bbox = list(map(int, outputs['bbox']))
                cv2.rectangle(frame, (bbox[0], bbox[1]),
                              (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                              (0, 255, 0), 3)
            #yy

            #plt.imshow(frame)
            #plt.show()

            cv2.imshow(video_name, frame)
            cv2.waitKey(40)
Пример #17
0
def main():
    # Initialize ecci sdk and connect to the broker in edge-cloud
    ecci_client = Client()
    mqtt_thread = threading.Thread(target=ecci_client.initialize)
    mqtt_thread.start()
    ecci_client.wait_for_ready()
    print('edge start --------')

    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model   
    checkpoint = torch.load(args.snapshot)
    model.load_state_dict(checkpoint)
    for param in model.parameters():
        param.requires_grad = False
    model.eval().to(device)

    #multiprocessing
    manager = mp.Manager()
    resQueue = manager.Queue()
    multiProcess = []

    # VID dataloader
    dataset = ImagenetDataset(args.dataset, is_val=True)
    true_case_stat, all_gb_boxes = group_annotation_by_class(dataset)

    for f in range(len(dataset)):
        frame, first_frame = dataset.get_image(f)
        if first_frame:        
            # keyframe need to be uploaded to cloud 
            print('first frame upload to cloud')

            # close the last multiprocessing
            for i in range(len(multiProcess)):
                multiProcess[i].join()

            # send frame to cloud
            payload = {"type":"data","contents":{"frame":frame}}
            print("####################",payload)
            ecci_client.publish(payload, "cloud")

            # get rect from cloud
            cloud_data = ecci_client.get_sub_data_payload_queue().get()
            print("###########recieve data from cloud",cloud_data)
            bbox= cloud_data["bbox"]
            label = cloud_data["label"]
            probs = cloud_data["probs"]

            # wirte txt
            for i in range(len(bbox)):
                write_txt(dataset, f, bbox[i], label[i], probs[i])

            # # start multiprocessing
            multiProcess = []
            for i in range(len(bbox)):
                multiProcess.append(build_multitracker(model,label[i],probs[i],resQueue))
            for i in range(len(multiProcess)):
                init_rect = [bbox[i][0],bbox[i][1],bbox[i][2]-bbox[i][0],bbox[i][3]-bbox[i][1]]
                multiProcess[i].init(frame, init_rect)
                multiProcess[i].start()
                
            key_frame = frame   
            first_frame = False
            index = 1

        # elif is_key(key_frame, frame):
        elif index % 5== 0:
            if is_key(key_frame, frame) or index % 15 ==0 :

                # keyframe need to be uploaded to cloud ##### outputs, time ######
                print('key frame upload to cloud')
            
                # close the last multiprocessing
                for i in range(len(multiProcess)):
                    multiProcess[i].join()

                # send frame to cloud
                payload = {"type":"data","contents":{"frame":frame}}
                print("####################",payload)
                ecci_client.publish(payload, "cloud")

                # get rect from cloud
                cloud_data = ecci_client.get_sub_data_payload_queue().get()
                print("###########recieve data from cloud",cloud_data)
                bbox= cloud_data["bbox"]
                label = cloud_data["label"]
                probs = cloud_data["probs"]

                
                # wirte txt
                for i in range(len(bbox)):
                    write_txt(dataset, f, bbox[i], label[i], probs[i])

                # # start multiprocessing
                multiProcess = []
                for i in range(len(bbox)):
                    multiProcess.append(build_multitracker(model,label[i],probs[i],resQueue))
                for i in range(len(multiProcess)):
                    init_rect = [bbox[i][0],bbox[i][1],bbox[i][2]-bbox[i][0],bbox[i][3]-bbox[i][1]]
                    multiProcess[i].init(frame, init_rect)
                    multiProcess[i].start()
                
                key_frame = frame
                index = 1
            else:
                print('track locally')
                for i in range(len(multiProcess)):
                    multiProcess[i].track(frame)
                    
                for i in range(len(multiProcess)):
                    resDict = resQueue.get()
                    resDict['bbox'] = [resDict['bbox'][0],resDict['bbox'][1],resDict['bbox'][0]+resDict['bbox'][2],resDict['bbox'][1]+resDict['bbox'][3]]
                    write_txt(dataset, f, resDict['bbox'], resDict['label'], resDict['probs']-0.1)
                    index += 1 

        else:
            print('track locally')
            for i in range(len(multiProcess)):
                multiProcess[i].track(frame)

            t= time.time()
            for i in range(len(multiProcess)):
                resDict = resQueue.get()
                resDict['bbox'] = [resDict['bbox'][0],resDict['bbox'][1],resDict['bbox'][0]+resDict['bbox'][2],resDict['bbox'][1]+resDict['bbox'][3]]
                write_txt(dataset, f, resDict['bbox'], resDict['label'], resDict['probs']-0.1)

            print(time.time()-t)
            index +=1
            
    map_compute()
Пример #18
0
def main():
    # Initialize ecci sdk and connect to the broker in edge-cloud
    ecci_client = Client()
    mqtt_thread = threading.Thread(target=ecci_client.initialize)
    mqtt_thread.start()
    ecci_client.wait_for_ready()
    print('edge start --------')

    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    checkpoint = torch.load(args.snapshot)
    model.load_state_dict(checkpoint)
    for param in model.parameters():
        param.requires_grad = False
    model.eval().to(device)

    #multiprocessing
    manager = mp.Manager()
    resQueue = manager.Queue()
    multiProcess = []
    label = []
    probs = []

    for i in range(10):
        multiProcess.append(build_multitracker(model, label, probs, resQueue))
        multiProcess[i].start()

    first_frame = True
    filename = "./demo/video.avi"
    camera = cv2.VideoCapture(filename)

    fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', '2')
    demo_out = cv2.VideoWriter('./demo/output/video.avi', fourcc, 25,
                               (640, 368))

    while camera.isOpened():
        ret, frame = camera.read()
        if ret == True:
            print(frame.shape)

            if first_frame:
                # keyframe need to be uploaded to cloud
                print('first frame')
                key_frame = frame

                # communication
                payload = {"type": "data", "contents": {"frame": frame}}
                print("####################", payload)
                ecci_client.publish(payload, "cloud")

                cloud_data = ecci_client.get_sub_data_payload_queue().get()
                print("###########recieve data from cloud", cloud_data)
                bbox = cloud_data["bbox"]
                label = cloud_data["label"]
                probs = cloud_data["probs"]
                num_process = len(bbox)

                t_detect_start = time.time()
                for i in range(num_process):
                    cv2.rectangle(frame, (int(bbox[i][0]), int(bbox[i][1])),
                                  (int(bbox[i][2]), int(bbox[i][3])),
                                  (0, 0, 255), 3)
                    cv2.putText(frame,
                                '%s: %.3f' % (label[i], probs[i]),
                                (bbox[i][0], bbox[i][1] - 15),
                                cv2.FONT_HERSHEY_PLAIN,
                                1.0, (0, 0, 255),
                                thickness=1)
                    init_rect = [
                        bbox[i][0], bbox[i][1], bbox[i][2] - bbox[i][0],
                        bbox[i][3] - bbox[i][1]
                    ]
                    multiProcess[i].init(frame, init_rect, label[i], probs[i])
                t_detect_end = time.time()
                print("detect fps : ", 1 / (t_detect_end - t_detect_start))

                first_frame = False
                index = 1

            elif index % 5 == 0:
                if is_key(key_frame, frame) or index % 10 == 0:
                    # keyframe need to be uploaded to cloud ##### outputs, time ######
                    print('key frame')
                    key_frame = frame

                    # communication
                    payload = {"type": "data", "contents": {"frame": frame}}
                    print("####################", payload)
                    ecci_client.publish(payload, "cloud")

                    cloud_data = ecci_client.get_sub_data_payload_queue().get()
                    print("###########recieve data from cloud", cloud_data)
                    bbox = cloud_data["bbox"]
                    label = cloud_data["label"]
                    probs = cloud_data["probs"]
                    num_process = len(bbox)

                    t_detect_start = time.time()
                    for i in range(num_process):
                        cv2.rectangle(frame,
                                      (int(bbox[i][0]), int(bbox[i][1])),
                                      (int(bbox[i][2]), int(bbox[i][3])),
                                      (0, 0, 255), 3)
                        cv2.putText(frame,
                                    '%s: %.3f' % (label[i], probs[i]),
                                    (bbox[i][0], bbox[i][1] - 15),
                                    cv2.FONT_HERSHEY_PLAIN,
                                    1.0, (0, 0, 255),
                                    thickness=1)
                        init_rect = [
                            bbox[i][0], bbox[i][1], bbox[i][2] - bbox[i][0],
                            bbox[i][3] - bbox[i][1]
                        ]
                        multiProcess[i].init(frame, init_rect, label[i],
                                             probs[i])
                    t_detect_end = time.time()
                    print("detect fps : ", 1 / (t_detect_end - t_detect_start))

                    index = 1
                else:
                    print('non-key frame')
                    t_track_start = time.time()
                    for i in range(num_process):
                        multiProcess[i].track(frame)
                    t_track_end = time.time()
                    print("track fps : ", 1 / (t_track_end - t_track_start))

                    for i in range(num_process):
                        resDict = resQueue.get()
                        print(resDict)
                        bbox = list(map(int, resDict['bbox']))
                        cv2.rectangle(frame, (bbox[0], bbox[1]),
                                      (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                      (0, 255, 0), 3)
                        cv2.putText(frame,
                                    '%s: %.3f' % (label[i], probs[i]),
                                    (bbox[0], bbox[1] - 15),
                                    cv2.FONT_HERSHEY_PLAIN,
                                    1.0, (0, 255, 0),
                                    thickness=1)
                    index += 1

            else:
                print('non-key frame')
                t_track_start = time.time()
                for i in range(num_process):
                    multiProcess[i].track(frame)
                t_track_end = time.time()
                print("track fps : ", 1 / (t_track_end - t_track_start))

                for i in range(num_process):
                    resDict = resQueue.get()
                    print(resDict)
                    bbox = list(map(int, resDict['bbox']))
                    cv2.rectangle(frame, (bbox[0], bbox[1]),
                                  (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                  (0, 255, 0), 3)
                    cv2.putText(frame,
                                '%s: %.3f' % (label[i], probs[i]),
                                (bbox[0], bbox[1] - 15),
                                cv2.FONT_HERSHEY_PLAIN,
                                1.0, (0, 255, 0),
                                thickness=1)
                index += 1

            print("writing")
            demo_out.write(frame)

        else:
            break

    camera.release()
    demo_out.release()
    cv2.destroyAllWindows()
Пример #19
0
def ObjectTracking():

    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(args.snapshot,
                   map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    # parameters init
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)

    first_frame = True
    cnt_ = 0
    pre_frame = 0
    pre_rect = 0
    points = []

    # main loop for getting frame and tracking object
    for frame in get_frames(args.video_name):
        if first_frame:
            try:
                # to select object
                init_rect = cv2.selectROI(video_name, frame, False, False)
                pre_rect = init_rect
            except:
                exit()

            # init model
            tracker.init(frame, init_rect)
            first_frame = False

        else:
            cnt_ += 1

            # make prediction
            outputs = tracker.track(frame)

            if outputs['best_score'] > 0.6:

                pre_frame = frame

                if 'polygon' in outputs:

                    polygon = np.array(outputs['polygon']).astype(np.int32)
                    cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True,
                                  (0, 255, 0), 3)
                    #make mask
                    mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
                    mask = mask.astype(np.uint8)
                    mask = np.stack([mask, mask * 255,
                                     mask]).transpose(1, 2, 0)
                    frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)

                else:
                    bbox = list(map(int, outputs['bbox']))
                    pre_rect = bbox
                    cv2.rectangle(frame, (bbox[0], bbox[1]),
                                  (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                  (0, 255, 0), 3)
            else:
                # re-init model using previous location
                tracker.init(pre_frame, pre_rect)

            # draw moving path
            if args.draw_moving_path and len(polygon) != 0:
                points = draw_moving_path(polygon, points, frame)

            ## save frame as JPEG file
            if args.saveframe and os.path.isdir(args.framepath):
                fullpath = args.framepath + "frame{0:0>3}.jpg".format(cnt_)
                cv2.imwrite(fullpath, frame)

            cv2.imshow(video_name, frame)

            # may need to adjust based on your hardware
            cv2.waitKey(20)
Пример #20
0
def main():
    # instantiate iiwa
    iiwa = iiwaRobot()
    time.sleep(4) # allow iiwa taking some time to wake up
    # zero joints
    iiwa.move_joint(commit=True)
    # iiwa get ready
    iiwa.move_joint(JOINT_PERCH, commit=True)
    time.sleep(4)
    rospy.loginfo("iiwa is ready")
    # read TCP orientation
    QUAT = Quaternion()
    QUAT.x = iiwa.cartesian_pose.orientation.x
    QUAT.y = iiwa.cartesian_pose.orientation.y
    QUAT.z = iiwa.cartesian_pose.orientation.z
    QUAT.w = iiwa.cartesian_pose.orientation.w
    # Configure realsense D435 depth and color streams
    pipeline = rs.pipeline()
    config = rs.config()
    config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
    config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
    profile = pipeline.start(config)
    # Create an align object
    align_to = rs.stream.color
    align = rs.align(align_to)
    # load siammask config
    cfg.merge_from_file(sys.path[0]+"/siammask_r50_l3/config.yaml")
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')
    # create model
    model = ModelBuilder()
    # load model
    model.load_state_dict(torch.load(sys.path[0]+"/siammask_r50_l3/model.pth",
        map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)
    # build tracker
    tracker = build_tracker(model)
    # label object
    video_name = 'D435_color'
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)
    first_frame = True
    while True:
        # wait image stream and select object of interest
        frames = pipeline.wait_for_frames()
        # Align the depth frame to color frame
        aligned_frames = align.process(frames)
        color_frame = aligned_frames.get_color_frame()
        depth_frame = aligned_frames.get_depth_frame()
        depth_intrinsics = rs.video_stream_profile(depth_frame.profile).get_intrinsics()
        # convert image to numpy arrays
        if color_frame:
            color_image = np.asanyarray(color_frame.get_data())
            depth_image = np.asanyarray(depth_frame.get_data())
        if first_frame:
            try:
                init_rect = cv2.selectROI(video_name, color_image, False, False)
            except:
                exit()
            tracker.init(color_image, init_rect)
            first_frame = False
        else:
            # start tracking
            outputs = tracker.track(color_image)
            polygon = np.array(outputs['polygon']).astype(np.int32)
            cv2.polylines(color_image, [polygon.reshape((-1, 1, 2))],
                          True, (0, 255, 0), 3)
            mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
            mask = mask.astype(np.uint8)
            mask = np.stack([mask, mask*255, mask]).transpose(1, 2, 0)
            color_image = cv2.addWeighted(color_image, 0.77, mask, 0.23, -1)
            bbox = list(map(int, outputs['bbox']))
            poi_pixel = [int(bbox[0]+0.5*bbox[2]), int(bbox[1]+0.5*bbox[3])]
            poi_depth = depth_frame.get_distance(poi_pixel[0], poi_pixel[1])
            poi_rs = rs.rs2_deproject_pixel_to_point(depth_intrinsics, poi_pixel, poi_depth)
            print("Object 3D position w.r.t. camera frame: {}".format(poi_rs))
            if not np.allclose(poi_rs, np.zeros(3)):
                # compute transformed position of poi w.r.t. iiwa_link_0
                transfrom = iiwa.tf_listener.getLatestCommonTime('/iiwa_link_0', '/rs_d435')
                pos_rs = PoseStamped()
                pos_rs.header.frame_id = 'rs_d435'
                pos_rs.pose.orientation.w = 1.
                pos_rs.pose.position.x = poi_rs[0]
                pos_rs.pose.position.y = poi_rs[1]
                pos_rs.pose.position.z = poi_rs[2]
                pos_iiwa = iiwa.tf_listener.transformPose('/iiwa_link_0', pos_rs)
                rospy.loginfo("Object 3D position w.r.t. iiwa base from: {}\n ee w.r.t. iiwa base: {}".format(pos_iiwa.pose.position, iiwa.cartesian_pose.position))
                # set cartesian goal
                iiwa.goal_carte_pose.header.frame_id = 'iiwa_link_0'
                iiwa.goal_carte_pose.pose.position.x = X
                iiwa.goal_carte_pose.pose.position.y = np.clip(pos_iiwa.pose.position.y, Y_MIN, Y_MAX)
                iiwa.goal_carte_pose.pose.position.z = np.clip(pos_iiwa.pose.position.z, Z_MIN, Z_MAX)
                iiwa.goal_carte_pose.pose.orientation = QUAT
                iiwa.move_cartesian(cartesian_pose=iiwa.goal_carte_pose)

        # display image stream, press 'ESC' or 'q' to terminate
        cv2.imshow(video_name, color_image)
        key = cv2.waitKey(40)
        if key in (27, ord("q")):
            break

    time.sleep(4)
    iiwa.move_joint(joint_position=JOINT_PERCH)
    time.sleep(4)
    pipeline.stop()
    rospy.loginfo("Finished")
Пример #21
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(torch.load(args.snapshot, map_location=device))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)

    last_frame = None
    last_bbox = None

    for frame in get_frames(args.video_name):
        if first_frame:
            try:
                init_rect = cv2.selectROI(video_name, frame, False, False)
            except:
                exit()
            bbox = init_rect
            print(init_rect)
            tracker.init(frame, init_rect)
            first_frame = False
        else:
            outputs = tracker.track(frame)

            score = outputs["best_score"]
            if score < 0.95:
                tracker.init(last_frame, last_bbox)
                continue

            print(outputs["best_score"])
            if 'polygon' in outputs:
                polygon = np.array(outputs['polygon']).astype(np.int32)
                cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True,
                              (0, 255, 0), 3)
                mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
                mask = mask.astype(np.uint8)
                mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0)
                frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)
            else:
                bbox = list(map(int, outputs['bbox']))
                cv2.rectangle(frame, (bbox[0], bbox[1]),
                              (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                              (0, 255, 0), 3)

            last_frame = frame
            last_bbox = bbox

            cv2.imshow(video_name, frame)
            cv2.waitKey(40)
def main():

    torch.cuda.set_device(args.gpu_id)

    model_dir = "./experiments/siamrpn_r50_l234_dwxcorr/model.pth"
    model_config = "./experiments/siamrpn_r50_l234_dwxcorr/config.yaml"

    if os.path.isfile(model_dir):
        print("model file {} found".format(model_dir))
    else:
        print("model files not found, starting download".format(model_dir))
        os.system(
            "gdown https://drive.google.com/uc?id=1-tEtYQdT1G9kn8HsqKNDHVqjE16F8YQH")
        os.system("mv model.pth ./experiments/siamrpn_r50_l234_dwxcorr")

    # load config
    cfg.merge_from_file(model_config)
    cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(torch.load(model_dir,
                                     map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # create an unique identifier
    worker_id = uuid.uuid4()

    # build tracker
    tracker = build_tracker(model)

    # Socket to talk to server
    context = zmq.Context()
    sub_socket = context.socket(zmq.SUB)

    # set up frame listening socket
    sub_socket.connect("tcp://{}:5556".format(args.server_ip))
    sub_socket.setsockopt_string(zmq.SUBSCRIBE, "frame_")
    sub_socket.setsockopt_string(zmq.SUBSCRIBE, str(worker_id))

    # setup push socket
    context = zmq.Context()
    push_socket = context.socket(zmq.PUSH)
    push_socket.connect("tcp://{}:5557".format(args.server_ip))

    # event monitoring
    # used to register worker once connection is established
    EVENT_MAP = {}
    for name in dir(zmq):
        if name.startswith('EVENT_'):
            value = getattr(zmq, name)
            EVENT_MAP[value] = name

    # monitor thread function
    def event_monitor(monitor):
        while monitor.poll():
            evt = recv_monitor_message(monitor)
            evt.update({'description': EVENT_MAP[evt['event']]})
            if evt['event'] == zmq.EVENT_HANDSHAKE_SUCCEEDED:
                push_socket.send_json(
                    {"type": "REGISTER", "id": str(worker_id)})
            if evt['event'] == zmq.EVENT_MONITOR_STOPPED:
                break
        monitor.close()

    # register monitor
    monitor = sub_socket.get_monitor_socket()

    t = threading.Thread(target=event_monitor, args=(monitor,))
    t.start()

    support = None

    try:
        while True:
            # wait for next message
            _ = sub_socket.recv()
            md = sub_socket.recv_json()
            if md['type'] == 'FRAME':
                msg = sub_socket.recv()
                buf = memoryview(msg)
                frame = np.frombuffer(
                    buf, dtype=md['dtype']).reshape(md['shape'])

                if support is None:
                    continue

                outputs = tracker.track(frame)
                bbox = list(map(int, outputs['bbox']))

                # send result
                push_socket.send_json(
                    {
                        "type": "TRACK",
                        "bbox": bbox,
                        "score": outputs['best_score'].tolist(),
                        "time": md['time'],
                        "id": str(worker_id)
                    })
                print('message: {}'.format(md['time']), end='\r')
            elif md['type'] == 'SUPPORT':
                frame_raw = md['data']['img']  # base 64 png image
                frame = np.array(
                    Image.open(
                        io.BytesIO(
                            base64.b64decode(frame_raw)
                        )
                    ).convert('RGB'))[:, :, ::-1]
                bbox = [int(float(i)) for i in md['data']['bbox'].split(",")]
                tracker.init(frame, bbox)
                support = (frame, bbox)
                print('Support received, tracking will now start')
            elif md['type'] == 'LOCATION':
                # make sure tracker has been initalized
                if support is not None:
                    center_pos = np.array(md['data'])
                    tracker.update(center_pos)
            elif md['type'] == 'PING':
                push_socket.send_json({"type": "PONG", "id": str(worker_id)})
            else:
                print('Invalid message type received: {}'.format(md['type']))
    except KeyboardInterrupt:
        print('Exiting... notifying server of disconnect')
        push_socket.send_json(
            {"type": "FIN", "id": str(worker_id)})
        # wait for the server to respond or let the user forcefully close
        print("Waiting for server response. Press CTRL+C again to forcefully close")
        while True:
            _ = sub_socket.recv()
            md = sub_socket.recv_json()
            if md['type'] == "FIN":
                print('Server responded, now exiting')
                exit(0)
            elif md['type'] == "FRAME":
                # we have to accept the incoming frame to properly accept future messages
                msg = sub_socket.recv()
Пример #23
0
def main():
  cfg.merge_from_file("experiments/siamrpn_mobilev2_l234_dwxcorr/config.yaml")
  device = torch.device("cuda")

  model = ModelBuilder()

  model.load_state_dict(
    torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth",
      map_location=lambda storage, loc: storage.cpu()))
  model.eval().to(device)

  tracker = build_tracker(model)

  frame = cv2.imread("image/benchmark_5.jpg")
  bbox = (280, 80, 200, 270)
  
  """
  This part is inhirited from tracker.init method, trying to serialize tracker.template
  """
  # tracker.init(frame, bbox)
  center_pos = np.array([bbox[0]+(bbox[2]-1)/2, bbox[1]+(bbox[3]-1)/2])
  size = np.array([bbox[2], bbox[3]])

  w_z = size[0] + 0.5 * np.sum(size)
  h_z = size[1] + 0.5 * np.sum(size)
  s_z = round(np.sqrt(w_z * h_z))
  s_x = 255 / 127 * s_z

  # calculate channel average
  channel_average = np.mean(frame, axis=(0, 1))

  z_crop = tracker.get_subwindow(frame, center_pos, 127, s_z, channel_average)
  x_crop = tracker.get_subwindow(frame, center_pos, 255, s_x, channel_average)
  #############################################################################
  # a new script model inhereted from template
  class ArcTemplate(torch.nn.Module):
    def __init__(self):
      super(ArcTemplate, self).__init__()

      self.backbone = get_backbone(cfg.BACKBONE.TYPE,
        **cfg.BACKBONE.KWARGS)

      self.neck = get_neck(cfg.ADJUST.TYPE,
        **cfg.ADJUST.KWARGS)
      
      self.rpn_head = get_rpn_head(cfg.RPN.TYPE,
        **cfg.RPN.KWARGS)

    def forward(self, z_crop):
      return self.neck(self.backbone(z_crop))

  arc = ArcTemplate()
  arc.load_state_dict(
    torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth",
      map_location=lambda storage, loc: storage.cpu()))
  arc.eval().to(device)

  zrf = arc(z_crop)

  for z in zrf:
    print(z.shape)

  torch.jit.trace(arc, z_crop).save("archine.pt")

  #############################################################################
  # a new script model inhereted from track
  class FrostTemplate(torch.nn.Module):
    # the frost is the combination of backbone/neck/rpn_head network
    def __init__(self):
      super(FrostTemplate, self).__init__()

      self.backbone = get_backbone(cfg.BACKBONE.TYPE,
        **cfg.BACKBONE.KWARGS)

      self.neck = get_neck(cfg.ADJUST.TYPE,
        **cfg.ADJUST.KWARGS)

      self.rpn_head = get_rpn_head(cfg.RPN.TYPE,
        **cfg.RPN.KWARGS)

    def forward(self, z, x):
      zf = self.neck(self.backbone(z))
      xf = self.neck(self.backbone(x))
      print("zf shape:\n", zf[0].shape, "\t", zf[1].shape, "\t", zf[2].shape)
      print("xf shape:\n", xf[0].shape, "\t", xf[1].shape, "\t", xf[2].shape)

      return self.rpn_head(zf, xf)


  fro = FrostTemplate()
  fro.load_state_dict(
    torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth",
      map_location=lambda storage, loc: storage.cpu()))
  fro.eval().to(device)

  res = fro(z_crop, x_crop)
  torch.jit.trace(fro, (z_crop, x_crop)).save("frost.pt")
  #############################################################################
  # the rpn head model
  class HeadTemplate(torch.nn.Module):
    def __init__(self):
      super(HeadTemplate, self).__init__()

      self.backbone = get_backbone(cfg.BACKBONE.TYPE,
        **cfg.BACKBONE.KWARGS)

      self.neck = get_neck(cfg.ADJUST.TYPE,
        **cfg.ADJUST.KWARGS)

      self.rpn_head = get_rpn_head(cfg.RPN.TYPE,
        **cfg.RPN.KWARGS)

    def forward(self, z, x):
      return self.rpn_head(z, x)
  

  hed = HeadTemplate()
  hed.load_state_dict(
    torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth",
      map_location=lambda storage, loc: storage.cpu()))
  hed.eval().to(device)

  zeta = hed(zrf, zrf)
  # the model works. However, we don't know how to save them into script model
  #############################################################################
  # visualization
  cv2.rectangle(frame, (280, 80), (480, 350), (0, 0, 255), 2)
  cv2.imshow("_", frame)
  cv2.waitKey(0)
Пример #24
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(args.snapshot,
                   map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    cap = cv2.VideoCapture(args.video_name)
    body_detector = BodyDetector()
    body_detector.load_model(path_to_model=args.model_path)
    first_frame_with_detection = False
    updating_frame = False
    sot_trackers = {}

    counter = 0

    while cap.isOpened():
        ret, frame = cap.read()
        img_height, img_width, _ = np.shape(frame)
        regions, _, _ = body_detector.process(frame)

        if len(regions) > 0 and not first_frame_with_detection:
            first_frame_with_detection = True

        if first_frame_with_detection and not updating_frame:
            for r in regions:
                counter += 1
                sot_trackers[counter] = MSTracker(model=model,
                                                  tracker_id=counter)
                sot_trackers[counter].tracker_init(frame, r)

            updating_frame = True
            print(f"Init number of MSTracker: {counter}")
            continue

        if updating_frame:
            current_frame_sot_regions = []
            for tracker_id in sot_trackers.keys():
                sot_region, sot_score = sot_trackers[tracker_id].update(frame)
                if sot_score > 0.5:
                    current_frame_sot_regions.append(sot_region)

            current_detected_regions = regions

            # compare SOT region and detected region to decide whether fire up a new MSTracker
            for d_region in current_detected_regions:
                new_tracker = True
                for sot_region in current_frame_sot_regions:
                    distance = math.sqrt((d_region.x - sot_region.x)**2 +
                                         (d_region.y - sot_region.y)**2)
                    if distance < 200:
                        new_tracker = False
                        break
                if new_tracker:
                    counter += 1
                    sot_trackers[counter] = MSTracker(model=model,
                                                      tracker_id=counter)
                    sot_trackers[counter].tracker_init(frame, d_region)
                    print(f"New Tracker: {counter}")

            # display
            # displayed = draw_regions(frame, current_frame_sot_regions, color=(0, 255, 0))
            # displayed = draw_regions(displayed, current_detected_regions)
            for r in current_frame_sot_regions:
                t_id = r.data["sot_id"]
                frame = write_into_region(frame,
                                          str(t_id),
                                          r,
                                          show_region_outline=True)
            cv2.putText(frame, f"MSTracker: {len(sot_trackers.keys())}",
                        (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0))
            cv2.imshow("body", frame)
            cv2.waitKey(1)
Пример #25
0
def main(args):
    seq_name = args.seq_name
    # the packages of trackers
    from pysot.core.config import cfg  # use the modified config file to reset the tracking system
    from pysot.models.model_builder import ModelBuilder
    # modified single tracker with warpper
    from mot_zj.MUST_sot_builder import build_tracker
    from mot_zj.MUST_utils import draw_bboxes, find_candidate_detection, handle_conflicting_trackers, sort_trackers
    from mot_zj.MUST_ASSO.MUST_asso_model import AssociationModel
    from mot_zj.MUST_utils import traj_interpolate

    dataset_dir = os.path.join(root, 'result')
    seq_type = 'img'
    # set the path of config parameters and
    config_path = os.path.join(track_dir, "mot_zj", "MUST_config_file",
                               "alex_config.yaml")
    model_params = os.path.join(params_dir, "alex_model.pth")
    # enable the visualisation or not
    is_visualisation = False
    # print the information of the tracking process or not
    is_print = True

    results_dir = os.path.join(dataset_dir, 'track')
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)
    img_traj_dir = os.path.join(track_dir, "img_traj")
    if os.path.exists(os.path.join(img_traj_dir, seq_name)):
        shutil.rmtree(os.path.join(img_traj_dir, seq_name))

    seq_dir = os.path.join(dataset_dir, seq_type)
    seq_names = os.listdir(seq_dir)
    seq_num = len(seq_names)

    # record the processing time
    start_point = time.time()

    # load config
    # load the config information from other variables
    cfg.merge_from_file(config_path)

    # set the flag that CUDA is available
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create the tracker model (Resnet50)
    track_model = ModelBuilder()
    # load tracker model
    track_model.load_state_dict(
        torch.load(model_params,
                   map_location=lambda storage, loc: storage.cpu()))
    track_model.eval().to(device)
    # create assoiation model
    asso_model = AssociationModel(args)

    seq_det_path = os.path.join(seq_dir, seq_name, 'det')
    seq_img_path = os.path.join(seq_dir, seq_name, 'img1')

    # print path and dataset information
    if is_print:
        print('preparing for the sequence: {}'.format(seq_name))
        print('-----------------------------------------------')
        print("detection result path: {}".format(seq_det_path))
        print("image files path: {}".format(seq_img_path))
        print('-----------------------------------------------')

    # read the detection results
    det_results = np.loadtxt(os.path.join(seq_det_path, 'det.txt'),
                             dtype=float,
                             delimiter=',')

    # read images from each sequence
    images = sorted(glob.glob(os.path.join(seq_img_path, '*.jpg')))
    img_num = len(images)

    # the contrainer of trackers
    trackers = []

    # visualisation settings
    if is_visualisation:
        cv2.namedWindow(seq_name, cv2.WINDOW_NORMAL)

    # init(reset) the identifer
    id_num = 0

    # tracking process in each frame
    for nn, im_path in enumerate(images):
        each_start = time.time()
        frame = nn + 1
        img = cv2.imread(im_path)
        print('Frame {} is loaded'.format(frame))

        # load the detection results of this frame
        pre_frame_det_results = det_results[det_results[:, 0] == frame]

        # non-maximal surpressing [frame, id, x, y, w, h, score]
        indices = nms.boxes(pre_frame_det_results[:, 2:6],
                            pre_frame_det_results[:, 6])
        frame_det_results = pre_frame_det_results[indices, :]

        # extract the bbox [fr, id, (x, y, w, h), score]
        bboxes = frame_det_results[:, 2:6]

        ############################################
        # ***multiple tracking and associating***  #
        ############################################

        # 1. sort trackers
        index1, index2 = sort_trackers(trackers)

        # 2. save the processed index of trackers
        index_processed = []
        track_time = 0
        asso_time = 0
        for k in range(2):
            # process trackers in the first or the second class
            if k == 0:
                index_track = index1
            else:
                index_track = index2
            track_start = time.time()
            for ind in index_track:
                if trackers[ind].track_state == cfg.STATE.TRACKED or trackers[
                        ind].track_state == cfg.STATE.ACTIVATED:
                    indices = find_candidate_detection(
                        [trackers[i] for i in index_processed], bboxes)
                    to_track_bboxes = bboxes[
                        indices, :] if not bboxes.size == 0 else np.array([])
                    # MOT_track(tracking process)
                    trackers[ind].track(img, to_track_bboxes, frame)
                    # if the tracker keep its previous tracking state (tracked or activated)
                    if trackers[
                            ind].track_state == cfg.STATE.TRACKED or trackers[
                                ind].track_state == cfg.STATE.ACTIVATED:
                        index_processed.append(ind)
            track_time += time.time() - track_start
            asso_start = time.time()
            for ind in index_track:
                if trackers[ind].track_state == cfg.STATE.LOST:
                    indices = find_candidate_detection(
                        [trackers[i] for i in index_processed], bboxes)
                    to_associate_bboxes = bboxes[
                        indices, :] if not bboxes.size == 0 else np.array([])
                    # MOT_track(association process)
                    trackers[ind].track(img, to_track_bboxes, frame)
                    # add process flag
                    index_processed.append(ind)
            asso_time += time.time() - asso_start
        ############################################
        #        ***init new trackers ***          #
        ############################################

        # find the candidate bboxes to init new trackers
        indices = find_candidate_detection(trackers, bboxes)

        # process the tracker: init (1st frame) and track mathod (the other frames)
        for index in indices:
            id_num += 1
            new_tracker = build_tracker(track_model)
            new_tracker.init(img, bboxes[index, :], id_num, frame, seq_name,
                             asso_model)
            trackers.append(new_tracker)

        # find conflict of trackers (I need to know what conflict)
        trackers = handle_conflicting_trackers(trackers, bboxes)

        # interpolate the tracklet results
        for tracker in trackers:
            if tracker.track_state == cfg.STATE.TRACKED or tracker.track_state == cfg.STATE.ACTIVATED:
                bbox = tracker.tracking_bboxes[-1, :]
                traj_interpolate(tracker, bbox, tracker.frames[-1], 30)

        ############################################
        #    ***collect tracking results***        #
        ############################################

        # collect the tracking results (all the results, without selected)
        if frame == len(images):
            results_bboxes = np.array([])
            for tracker in trackers:
                if results_bboxes.size == 0:
                    results_bboxes = tracker.results_return()
                else:
                    res = tracker.results_return()
                    if not res.size == 0:
                        results_bboxes = np.concatenate(
                            (results_bboxes, tracker.results_return()), axis=0)
            # test code segment
            filename = '{}.txt'.format(seq_name)
            results_bboxes = results_bboxes[np.argsort(results_bboxes[:, 0])]
            print(results_bboxes.shape[0])
            # detections filter
            indices = []
            if seq_name == 'b1':
                for ind, result in enumerate(results_bboxes):
                    if result[3] > 540:
                        if result[4] * result[5] < 10000:
                            indices.append(ind)
                results_bboxes = np.delete(results_bboxes, indices, axis=0)
            np.savetxt(os.path.join(results_dir, filename),
                       results_bboxes,
                       fmt='%d,%d,%.1f,%.1f,%.1f,%.1f')
        ############################################
        #        ***crop tracklet image***         #
        ############################################

        for tracker in trackers:
            if tracker.track_state == cfg.STATE.START or tracker.track_state == cfg.STATE.TRACKED or tracker.track_state == cfg.STATE.ACTIVATED:
                bbox = tracker.tracking_bboxes[-1, :]
                x1 = int(np.floor(np.maximum(1, bbox[0])))
                y1 = int(np.ceil(np.maximum(1, bbox[1])))
                x2 = int(np.ceil(np.minimum(img.shape[1], bbox[0] + bbox[2])))
                y2 = int(np.ceil(np.minimum(img.shape[0], bbox[1] + bbox[3])))
                img_traj = img[y1:y2, x1:x2, :]
                traj_path = os.path.join(img_traj_dir, seq_name,
                                         str(tracker.id_num))
                if not os.path.exists(traj_path):
                    os.makedirs(traj_path)
                tracklet_img_path = os.path.join(traj_path,
                                                 str(tracker.frames[-1]))
                cv2.imwrite("{}.jpg".format(tracklet_img_path), img_traj)
        each_time = time.time() - each_start
        print("period: {}s, track: {}s({:.2f}), asso: {}s({:.2f})".format(
            each_time, track_time, (track_time / each_time) * 100, asso_time,
            (asso_time / each_time) * 100))
        if is_visualisation:
            ##########################################
            # infomation print and visualisation     #
            ##########################################
            # print("THe numger of new trackers: {}".format(len(indices)))
            active_trackers = [
                trackers[i].id_num for i in range(len(trackers))
                if trackers[i].track_state == cfg.STATE.ACTIVATED
                or trackers[i].track_state == cfg.STATE.TRACKED
                or trackers[i].track_state == cfg.STATE.LOST
            ]
            print("The number of active trackers: {}".format(
                len(active_trackers)))
            print(active_trackers)
            anno_img = draw_bboxes(img, bboxes)
            cv2.imshow(seq_name, anno_img)
            cv2.waitKey(1)
        print("The running time is: {} s".format(time.time() - start_point))

    print("The total processing time is: {} s".format(time.time() -
                                                      start_point))
Пример #26
0
class SOTTracker:
    def __init__(self, config_file, model_file):
        self.config_file = config_file
        self.model_file = model_file

        # load config
        cfg.merge_from_file(self.config_file)
        cfg.CUDA = torch.cuda.is_available()
        self.device = torch.device('cuda' if cfg.CUDA else 'cpu')

        # load model
        self.model = ModelBuilder()
        self.model.load_state_dict(
            torch.load(model_file,
                       map_location=lambda storage, loc: storage.cpu()))
        self.model.eval().to(self.device)

        # build tracker
        self.tracker = build_tracker(self.model)

    def tracking(self, init_img, init_bbox, imglist_to_track):
        # init tracker
        init_frame = cv2.imread(init_img)
        height, width, channels = init_frame.shape
        # convert bbox from relative coordinates to actual values
        init_bbox_coord = [
            int(init_bbox[0] * width),
            int(init_bbox[1] * height),
            int(init_bbox[2] * width),
            int(init_bbox[3] * height)
        ]
        self.tracker.init(init_frame, init_bbox_coord)

        # do tracking
        results = {
            _: {
                'polygon': None,
                'mask': None,
                'bbox': None
            }
            for _ in imglist_to_track
        }
        for img in imglist_to_track:
            frame = cv2.imread(img)
            outputs = self.tracker.track(frame)
            if 'polygon' in outputs:
                polygon = np.array(outputs['polygon']).astype(np.int32)
                results[img]['polygon'] = [polygon.reshape((-1, 1, 2))]
                results[img]['mask'] = outputs['mask']

            if 'bbox' in outputs:
                bbox = list(map(float, outputs['bbox']))
                results[img]['bbox'] = [
                    bbox[0] / width, bbox[1] / height, bbox[2] / width,
                    bbox[3] / height
                ]
        return results

    def tracking_json_query(self, query_json):
        query = json.loads(query_json)
        try:
            init_img = query['init_img']
            init_bbox = [int(_) for _ in query['init_bbox']]
            imglist_to_track = query['imglist_to_track']
            assert len(imglist_to_track) > 0
            return self.tracking(init_img, init_bbox, imglist_to_track)
        except KeyError:
            print('invalid query json')
            return None

    @staticmethod
    def result2json(results):
        json_string = json.dumps(results)
        return json_string

    @staticmethod
    def vis_tracking_result(img_file, result):
        vis_frame = cv2.imread(img_file)
        height, width, channels = vis_frame.shape
        if result['polygon'] is not None:
            cv2.polylines(vis_frame, result['polygon'], True, (0, 255, 0), 3)
            mask = ((result['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
            mask = mask.astype(np.uint8)
            mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0)
            vis_frame = cv2.addWeighted(vis_frame, 0.77, mask, 0.23, -1)
        elif result['bbox'] is not None:
            bbox = result['bbox']
            cv2.rectangle(vis_frame,
                          (int(bbox[0] * width), int(bbox[1] * height)),
                          (int((bbox[0] + bbox[2]) * width),
                           int((bbox[1] + bbox[3]) * height)), (0, 255, 0), 3)
        return vis_frame
Пример #27
0
def main():
    #try:
        #os.remove("/home/developer/kashyap/pysot-master/*.csv")
    #except:
     #   pass
    # with open('./demo/groundtruth.csv', 'r') as f:
    #     reader = csv.reader(f)
    #     cords = list(reader)

    # load config

    cfg.merge_from_file('./experiments/siamrpn_alex_dwxcorr/config.yaml')
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')
    print(device)
    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(torch.load('./experiments/siamrpn_alex_dwxcorr/model.pth', map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)


    video_list = glob1("/home/developer/kashyap/pysot-master/demo/vids/", "*.mp4")
    for video_name in video_list:
        video_name_str = os.path.splitext(video_name)[0]
        df = pd.read_csv('./demo/vids/'+video_name_str+'.csv', delimiter=',', header=None)
        cords = [list(x) for x in df.values]
        object_counter = 0
        for cord in cords:
            object_counter = object_counter + 1
            first_frame = True
            # if video_name:#args.video_name:
            #     #video_name = args.video_name.split('/')[-1].split('.')[0]
            #     video_name = video_name.split('/')[-1].split('.')[0]
            # else:
            #     exit()
            frame_count = 1
            mylist = [[frame_count,object_counter,cord,video_name]]
            for frame in get_frames(video_name):#(args.video_name):
                if first_frame:
                    try:
                        init_rect = cord
                    except:
                        exit()
                    tracker.init(frame, init_rect)
                    first_frame = False
                else:
                    outputs = tracker.track(frame)

                    if 'polygon' in outputs:
                        exit()
                    else:
                        #crds = map(int,outputs['bbox'])
                        bbox = list(map(int,outputs['bbox']))
                        #cv2.rectangle(frame,(bbox[0],bbox[1]),(bbox[0]+bbox[2],bbox[1]+bbox[3]),(0,255,0),3)    
                        #for frame in get_frames(video_name):#(args.video_name):
                        frame_count = frame_count + 1   
                        mylist.append([frame_count,object_counter,bbox,video_name])

            with open('vid-'+str(video_name)+'-tracking-'+str(object_counter)+'-object-'+str(cord)+'.csv', 'w', newline='') as csvfile:
                writer = csv.writer(csvfile, quoting=0, '\n')#,quotechar='',escapechar='')
                writer.writerow(mylist)
Пример #28
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(args.snapshot,
                   map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'

    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)

    save = 'C:\\Users\\biosi\\pysot\\demo'
    video_names = 'vita2'

    os.makedirs('%s/%s' % (save, video_names), exist_ok=True)
    os.makedirs('%s/%s/video' % (save, video_names), exist_ok=True)
    os.makedirs('%s/%s/mask' % (save, video_names), exist_ok=True)
    os.makedirs('%s/%s/crop' % (save, video_names), exist_ok=True)

    filename = 'vita2_output.mp4'

    #image2video write
    out = cv2.VideoWriter(
        os.path.join('%s\\%s\\video\\%s' % (save, video_names, filename)),
        cv2.VideoWriter_fourcc(*'mp4v'), 29.97,
        (int(list(get_frames(args.video_name))[0].shape[1]),
         int(list(get_frames(args.video_name))[0].shape[0])))

    x1 = []
    x2 = []
    x3 = []
    x4 = []
    y1 = []
    y2 = []
    y3 = []
    y4 = []

    for idx, frame in enumerate(get_frames(args.video_name)):
        if first_frame:
            try:
                init_rect = cv2.selectROI(video_names, frame, False, False)
            except:
                exit()
            tracker.init(frame, init_rect)
            first_frame = False
        else:
            outputs = tracker.track(frame)
            if 'polygon' in outputs:
                polygon = np.array(outputs['polygon']).astype(np.int32)
                polygon = polygon.reshape((-1, 1, 2))
                cv2.polylines(frame, [polygon], True, (0, 255, 0), 3)
                mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
                mask = mask.astype(np.uint8)
                mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0)
                cv2.imwrite(
                    "%s\\%s\\mask\\%04d.png" %
                    (save, video_names, int(idx + 1)), mask)
                #frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)
                x = []
                y = []
                for i in range(4):
                    y.append(polygon[i, 0, 1])
                    x.append(polygon[i, 0, 0])

                x1.append(x[0])
                x2.append(x[1])
                x3.append(x[2])
                x4.append(x[3])

                y1.append(y[0])
                y2.append(y[1])
                y3.append(y[2])
                y4.append(y[3])

                x_max = x[0]
                x_min = x[0]
                y_max = x[0]
                y_min = x[0]
                if x[0] < 0:
                    x_max = 0
                    x_min = 0
                if y[0] < 0:
                    y_max = 0
                    y_min = 0

                for i in range(4):
                    x_max = max(x_max, x[i])
                    x_min = min(x_min, x[i])
                    y_max = max(y_max, y[i])
                    y_min = min(y_min, y[i])
                    if x_min < 0:
                        x_min = 0
                    if y_min < 0:
                        y_min = 0

                #x1.append(bbox[0])
                #y1.append(bbox[1])
                #x2.append(bbox[0] + bbox[2])
                #y2.append(bbox[1] + bbox[3])

                #crop frame
                #crop = frame[bbox[1]:bbox[1]+bbox[3],bbox[0]:bbox[0]+bbox[2],:]
                crop = mask[int(y_min):int(y_max), int(x_min):int(x_max), :]
                print(y_min, y_max, x_min, x_max)
                cv2.imwrite(
                    "%s\\%s\\crop\\%04d.png" %
                    (save, video_names, int(idx + +1)), crop)

                frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)
            else:
                bbox = list(map(int, outputs['bbox']))

                cv2.rectangle(frame, (bbox[0], bbox[1]),
                              (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                              (0, 255, 0), 3)

            dataframe = pd.DataFrame({
                'y1': y1,
                'x1': x1,
                'y2': y2,
                'x2': x2,
                'y3': y3,
                'x3': x3,
                'y4': y4,
                'x4': x4
            })
            dataframe.to_csv(os.path.join(
                '%s\\%s\\crop\\%s' % (save, video_names, "vita2_output.csv")),
                             index=False)

            out.write(frame)
            cv2.imshow(video_names, frame)
            cv2.waitKey(40)
color_img = np.zeros((1280, 720, 3), dtype=np.uint8)
result_mask_img = np.zeros((1280, 720, 3), dtype=np.uint8)
result_bbox_img = np.zeros((1280, 720, 3), dtype=np.uint8)
result_mask = np.zeros((1280, 720), dtype=np.uint8)
pysot_img = np.zeros((1280, 720, 3), dtype=np.uint8)
mask_rcnn_flag = 0
pysot_mask = np.zeros((1280, 720), dtype=np.uint8)
pysot_contour_img = np.zeros((1280, 720, 3), dtype=np.uint8)

cfg.merge_from_file('config.yaml')
cfg.CUDA = torch.cuda.is_available()
device = torch.device('cuda' if cfg.CUDA else 'cpu')
model_pysot = ModelBuilder()
tracker = build_tracker(model_pysot)
model_pysot.load_state_dict(
    torch.load('model.pth', map_location=lambda storage, loc: storage.cpu()))
model_pysot.eval().to(device)


def run_maskrcnn():
    global color_img
    global result_mask_img
    global result_bbox_img
    global result_mask
    global mask_rcnn_flag
    global inds_len
    while 1:
        mask_rcnn_flag = 1
        result = inference_detector(model, color_img)
        result_mask_img, result_bbox_img, result_mask = show_result(
            color_img, result, model.CLASSES)
Пример #30
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(args.snapshot,
                   map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    # cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)
    out = None
    init_string = args.init_rect
    init_rect = list(map(int, init_string.split(',')))
    print("initial rectangle selected as: ", init_rect)
    print("output video is: ", args.output_video)
    count = 0
    for frame in get_frames(args.video_name):
        count += 1
        #         if count < 100:
        #             continue
        if first_frame:
            frame_size = frame.shape
            print(frame_size)
            out = cv2.VideoWriter(args.output_video,
                                  cv2.VideoWriter_fourcc(*'DIVX'), 30,
                                  (frame_size[1], frame_size[0]))
            tracker.init(frame, init_rect)
            first_frame = False

        else:
            all_outputs = tracker.track(frame)
            for outputs in all_outputs['bbox']:
                #             if 'polygon' in outputs:
                #                 polygon = np.array(outputs['polygon']).astype(np.int32)
                #                 cv2.polylines(frame, [polygon.reshape((-1, 1, 2))],
                #                               True, (0, 255, 0), 3)
                #                 mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
                #                 mask = mask.astype(np.uint8)
                #                 mask = np.stack([mask, mask*255, mask]).transpose(1, 2, 0)
                #                 frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)

                bbox = list(map(int, outputs))
                cv2.rectangle(frame, (bbox[0], bbox[1]),
                              (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                              (0, 255, 0), 2)
            cv2.imwrite("test.jpg", frame)
        out.write(frame)

    out.release()