def train(func_train_one_batch, param_dict, path, log_dir_path):
    dis_loss = []
    pos_gen_loss = []
    neg_gen_loss = []

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    p = Logger(log_dir_path, **param_dict)

    # load data base
    if p.dataset is 'car196':
        data = Car196(root=path)
    else:
        print('DATASET is', p.dataset)
        data = CUB_200_2011(root=path)

    sampler = BalancedBatchSampler(data.train.label_to_indices,
                                   n_samples=p.n_samples,
                                   n_classes=p.n_classes)
    kwargs = {'num_workers': 6, 'pin_memory': True}

    train_loader = DataLoader(data.train, batch_sampler=sampler,
                              **kwargs)  # (5 * 98, 3, 224, 224)

    # train_iter = iter(train_loader)
    # batch = next(train_iter)
    # generate_random_triplets_from_batch(batch, p.n_samples, p.n_classes)

    test_loader = DataLoader(data.test, batch_size=p.batch_size)

    # construct the model
    model = ModifiedGoogLeNet(p.out_dim, p.normalize_hidden).to(device)
    model_pos_gen = Generator(p.out_dim, p.normalize_hidden).to(device)
    model_neg_gen = Generator(p.out_dim, p.normalize_output).to(device)

    model_dis = Discriminator(p.out_dim, p.out_dim).to(device)

    model_optimizer = optim.Adam(model.parameters(), lr=p.learning_rate)
    pos_gen_optimizer = optim.Adam(model_pos_gen.parameters(),
                                   lr=p.learning_rate)
    neg_gen_optimizer = optim.Adam(model_neg_gen.parameters(),
                                   lr=p.learning_rate)
    dis_optimizer = optim.Adam(model_dis.parameters(), lr=p.learning_rate)
    model_feat_optimizer = optim.Adam(model.parameters(), lr=p.learning_rate)

    time_origin = time.time()
    best_nmi_1 = 0.
    best_f1_1 = 0.
    best_nmi_2 = 0.
    best_f1_2 = 0.

    for epoch in range(p.num_epochs):
        time_begin = time.time()
        epoch_loss_neg_gen = 0
        epoch_loss_pos_gen = 0
        epoch_loss_dis = 0
        total = 0
        for batch in tqdm(train_loader, desc='# {}'.format(epoch)):
            triplet_batch = generate_random_triplets_from_batch(
                batch, n_samples=p.n_samples, n_class=p.n_classes)
            loss_pos_gen, loss_neg_gen, loss_dis = func_train_one_batch(
                device, model, model_pos_gen, model_neg_gen, model_dis,
                model_optimizer, model_feat_optimizer, pos_gen_optimizer,
                neg_gen_optimizer, dis_optimizer, p, triplet_batch, epoch)
            '''
            loss_dis = func_train_one_batch(device, model, model_dis, model_pos_gen
                                            model_optimizer,
                                            dis_optimizer, p, triplet_batch)
            '''

            epoch_loss_neg_gen += loss_neg_gen
            epoch_loss_pos_gen += loss_pos_gen
            epoch_loss_dis += loss_dis
            total += triplet_batch[0].size(0)

        loss_average_neg_gen = epoch_loss_neg_gen / total
        loss_average_pos_gen = epoch_loss_pos_gen / total
        loss_average_dis = epoch_loss_dis / total

        dis_loss.append(loss_average_dis)
        pos_gen_loss.append(loss_average_pos_gen)
        neg_gen_loss.append(loss_average_neg_gen)

        nmi, f1 = evaluate(device,
                           model,
                           model_dis,
                           test_loader,
                           epoch,
                           n_classes=p.n_classes,
                           distance=p.distance_type,
                           normalize=p.normalize_output,
                           neg_gen_epoch=p.neg_gen_epoch)
        if nmi > best_nmi_1:
            best_nmi_1 = nmi
            best_f1_1 = f1
            torch.save(model, os.path.join(p.model_save_path, "model.pt"))
            torch.save(model_pos_gen,
                       os.path.join(p.model_save_path, "model_pos_gen.pt"))
            torch.save(model_neg_gen,
                       os.path.join(p.model_save_path, "model_neg_gen.pt"))
            torch.save(model_dis,
                       os.path.join(p.model_save_path, "model_dis.pt"))
        if f1 > best_f1_2:
            best_nmi_2 = nmi
            best_f1_2 = f1

        time_end = time.time()
        epoch_time = time_end - time_begin
        total_time = time_end - time_origin

        print("#", epoch)
        print("time: {} ({})".format(epoch_time, total_time))
        print("[train] loss NEG gen:", loss_average_neg_gen)
        print("[train] loss POS gen:", loss_average_pos_gen)
        print("[train] loss dis:", loss_average_dis)
        print("[test]  nmi:", nmi)
        print("[test]  f1:", f1)
        print("[test]  nmi:", best_nmi_1, "  f1:", best_f1_1, "for max nmi")
        print("[test]  nmi:", best_nmi_2, "  f1:", best_f1_2, "for max f1")
        print(p)

    plt.plot(dis_loss)
    plt.ylabel("dis_loss")
    plt.savefig(log_dir_path + '/dis_loss.png')
    plt.close()

    plt.plot(pos_gen_loss)
    plt.ylabel("pos_gen_loss")
    plt.savefig(log_dir_path + '/pos_gen_loss.png')
    plt.close()

    plt.plot(neg_gen_loss)
    plt.ylabel("neg_gen_loss")
    plt.savefig(log_dir_path + '/neg_gen_loss.png')
    plt.close()
Exemplo n.º 2
0
def main(args):
    #! TCP server 
    MESSAGE = [None]
    t = Thread(target=tcpThread, args=(MESSAGE, event))
    t.start()


    #! read image from camera   
    from joints_detectors.Alphapose.video2d import DetectionLoader
    det_loader = DetectionLoader(size=args.viz_size,device=0)
   

    #! visualization
    from common.visualization import Sequencial_animation, RealtimePlot
    pose3d_animation = Sequencial_animation( skeleton=Skeleton(), i=8,
        size=args.viz_size, azim=np.array(70., dtype=np.float32))
    
    fig_angle = plt.figure()
    ax1 = fig_angle.add_subplot(411)
    ax2 = fig_angle.add_subplot(412)
    ax3 = fig_angle.add_subplot(413)
    ax4 = fig_angle.add_subplot(414)

    # ani1_r = RealtimePlot(fig_angle, ax1, "squatting","r")
    # ani1_y = RealtimePlot(fig_angle, ax1, "squatting","y")
    # ani2_r = RealtimePlot(fig_angle, ax2, "turning","r")
    # ani2_y = RealtimePlot(fig_angle, ax2, "turning","y")
    # ani3_r = RealtimePlot(fig_angle, ax3, "moving","r")
    # ani3_y = RealtimePlot(fig_angle, ax3, "moving","y")
    # ani4_r = RealtimePlot(fig_angle, ax4, "walking","r")
    # ani4_y = RealtimePlot(fig_angle, ax4, "walking","y")

    ani1_r = RealtimePlot(fig_angle, ax1, "LElbowRoll_filtered","r")
    ani1_y = RealtimePlot(fig_angle, ax1, "LElbowRoll","y")
    ani2_r = RealtimePlot(fig_angle, ax2, "LElbowYaw_filtered","r")
    ani2_y = RealtimePlot(fig_angle, ax2, "LElbowYaw","y")
    ani3_r = RealtimePlot(fig_angle, ax3, "LShoulderRoll_filtered","r")
    ani3_y = RealtimePlot(fig_angle, ax3, "LShoulderRoll","y")
    ani4_r = RealtimePlot(fig_angle, ax4, "LShoulderPitch_filtered","r")
    ani4_y = RealtimePlot(fig_angle, ax4, "LShoulderPitch","y")
    
    thismanager = get_current_fig_manager()
    thismanager.window.wm_geometry("+1000+0")
    
    plt.ion()   # continuously plot


    #! load 3d pose estimation model
    model_pos = TemporalModel(17, 2, 17, filter_widths=[3, 3, 3, 3, 3], causal=args.causal, 
        dropout=args.dropout, channels=args.channels, dense=args.dense)
    if torch.cuda.is_available():
        model_pos = model_pos.cuda()

    ckpt, time1 = ckpt_time(time0)
    print('-------------- load 3d pose estimaion model spends {:.2f} seconds'.format(ckpt))


    #! load trained weights
    chk_filename = os.path.join(args.checkpoint, args.resume if args.resume else args.evaluate)
    print('Loading checkpoint', chk_filename)
    checkpoint = torch.load(chk_filename, map_location=lambda storage, loc: storage)
    model_pos.load_state_dict(checkpoint['model_pos'])
    #  Receptive field: 243 frames for args.arc [3, 3, 3, 3, 3]
    receptive_field = model_pos.receptive_field()
    pad = (receptive_field - 1) // 2  # Padding on each side
    causal_shift = 0

    ckpt, time2 = ckpt_time(time1)
    print('-------------- load trained weights for 3D model spends {:.2f} seconds'.format(ckpt))


    #! Initialize some kp
    keypoints_symmetry = metadata['keypoints_symmetry']
    kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1])
    joints_left, joints_right = list([4, 5, 6, 11, 12, 13]), list([1, 2, 3, 14, 15, 16])
    rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804], dtype=np.float32)
    kp_deque = deque(maxlen=9)
    compute_walking = Compute_walking() 


    #! loop through the frame (now fake frame)
    ckpt, time3 = ckpt_time(time2)


    #! Prepare deque for 8 joints
    q_LShoulderPitch, q_RShoulderPitch, q_LShoulderRoll, q_RShoulderRoll = [deque(maxlen=7) for i in range(4)]
    q_LElbowYaw, q_RElbowYaw, q_LElbowRoll, q_RElbowRoll = [deque(maxlen=7) for i in range(4)]
    try:
        while True and not event.is_set():
            frame = -1
            while frame < 65501: # prevent from overspilling
                ckpt, time3 = ckpt_time(time3)
                frame += 1
                print("------------------- frame ",frame, '-------- generate reconstruction 3D data spends {:.2f} seconds'.format(ckpt))


                #! get 2d key points
                kp = det_loader.update()
                if kp is None:
                    continue
        
                kp_deque.append(kp.numpy())    
                if len(kp_deque)<9:
                    continue
                

                #! estimate 3d pose 
                # normlization keypoints  Suppose using the camera parameter
                input_keypoints = normalize_screen_coordinates(np.asarray(kp_deque)[..., :2], w=1000, h=1002)
                prediction = evaluate(input_keypoints, pad, model_pos, return_predictions=True)
                
                # rotate the camera perspective
                prediction = camera_to_world(prediction, R=rot, t=0)

                # We don't have the trajectory, but at least we can rebase the height
                prediction[:, :, 2] -= np.min(prediction[:, :, 2])
                # input_keypoints = image_coordinates(input_keypoints[..., :2], w=1000, h=1002)


                #! Visualize 3d
                prediction = prediction[8]
                pose3d_animation.call(prediction)


                #! Motion retargeting
                MHip, RHip, RKnee, RAnkle, LHip, LKnee, LAnkle, Waist, Neck, Face, Top, \
                    LShoulder, LElbow, LWrist, RShoulder, RElbow, RWrist = prediction
                """
                Top:10,         Face:9,      Neck:8         Waist: 7    MHip: 0 
                LShoulder: 11,  LElbow: 12,  LWrist: 13,    LHip: 4,    LKnee: 5,    LAnkle: 6
                RShoulder: 14,  RElbow: 15,  RWrist: 16,    RHip: 1,    RKnee: 2,    RAnkle: 3
                """                
                left_upperarm = LElbow - LShoulder
                right_upperarm = RElbow - RShoulder
                left_lowerarm = LWrist - LElbow
                right_lowerarm = RWrist - RElbow
                left_upperleg = LHip - LKnee
                left_lowerleg = LAnkle - LKnee
                right_upperleg = RHip - RKnee
                right_lowerleg = RAnkle - RKnee
                
                #### Remapping ####
                coord = compute_torso_coord(Neck, LHip, RHip)

                LShoulderRoll, LShoulderPitch, left_upperarm_t = compute_shoulder_rotation(left_upperarm, coord)
                RShoulderRoll, RShoulderPitch, right_upperarm_t = compute_shoulder_rotation(right_upperarm, coord)

                LElbowYaw, LElbowRoll, left_upperarm_t, left_lowerarm_t = compute_elbow_rotation(left_upperarm, left_lowerarm, coord, -1)
                RElbowYaw, RElbowRoll, right_upperarm_t, right_lowerarm_t = compute_elbow_rotation(right_upperarm, right_lowerarm, coord, 1)

                Turning, turningangle = get_turning(coord[2])
                walking, walkingangle = compute_walking(LAnkle[1] - RAnkle[1])
                squatting, squattingangle_left, squattingangle_right = get_squatting(
                    left_upperleg, left_lowerleg, right_upperleg, right_lowerleg)
                moving, movingangle = get_moving(Top[0]-(RAnkle[0]+LAnkle[0])/2)

                #### Filtering ####
                LShoulderPitch_n = filter_data(q_LShoulderPitch, LShoulderPitch, median_filter)
                LShoulderRoll_n = filter_data(q_LShoulderRoll,  LShoulderRoll, median_filter)
                RShoulderPitch_n = filter_data(q_RShoulderPitch, RShoulderPitch, median_filter)
                RShoulderRoll_n = filter_data(q_RShoulderRoll,  RShoulderRoll, median_filter)
                
                LElbowYaw_n = filter_data(q_LElbowYaw, LElbowYaw, median_filter)
                LElbowRoll_n = filter_data(q_LElbowRoll,  LElbowRoll, median_filter)
                RElbowYaw_n = filter_data(q_RElbowYaw, RElbowYaw, median_filter)
                RElbowRoll_n = filter_data(q_RElbowRoll,  RElbowRoll, median_filter)

                
                #! Plot angle
                # ani1_r.call(frame,math.degrees(LElbowRoll_n))
                # ani1_y.call(frame,math.degrees(LElbowRoll))
                # ani2_r.call(frame,math.degrees(LElbowYaw_n))
                # ani2_y.call(frame,math.degrees(LElbowYaw))
                # ani3_r.call(frame,math.degrees(LShoulderRoll_n))
                # ani3_y.call(frame,math.degrees(LShoulderRoll))
                # ani4_r.call(frame,math.degrees(LShoulderPitch_n))
                # ani4_y.call(frame,math.degrees(LShoulderPitch))

                # ani1_r.call(frame, LShoulderRoll_n)
                # ani2_r.call(frame, LShoulderPitch_n)
                # ani1_y.call(frame,   LShoulderRoll)
                # ani2_y.call(frame,   LShoulderPitch)

                # # ani1_y.call(frame,squattingangle)
                # ani1_r.call(frame, 180*squatting)

                # # ani2_y.call(frame,turningangle)
                # ani2_r.call(frame, 60*Turning)

                # # ani3_y.call(frame,3*180*movingangle)
                # ani3_r.call(frame, 60*moving)

                # # ani4_y.call(frame,walkingangle)
                # ani4_r.call(frame,180* walking)
                             

                #! send udp
                message = np.array((frame,
                                    LShoulderRoll_n, LShoulderPitch_n, RShoulderRoll_n, RShoulderPitch_n, 
                                    LElbowYaw_n, LElbowRoll_n, RElbowYaw_n, RElbowRoll_n, 
                                    Turning, squatting, walking, moving))
                MESSAGE[0] = message.astype(np.float16).tostring()

                # print(sys.getsizeof(MESSAGE))
                # print(Turning, squatting, walking)
                # print(message)

    except KeyboardInterrupt:
        plt.ioff()
        cv2.destroyAllWindows()
        return
Exemplo n.º 3
0
def main(args):
    detector_2d = get_detector_2d(args.detector_2d)

    assert detector_2d, 'detector_2d should be in ({alpha, hr, open}_pose)'

    # 2D kpts loads or generate
    if not args.input_npz:
        video_name = args.viz_video
        keypoints = detector_2d(video_name)
    else:
        npz = np.load(args.input_npz)
        keypoints = npz['kpts']  # (N, 17, 2)

    keypoints_symmetry = metadata['keypoints_symmetry']
    kps_left, kps_right = list(keypoints_symmetry[0]), list(
        keypoints_symmetry[1])
    joints_left, joints_right = list([4, 5, 6, 11, 12,
                                      13]), list([1, 2, 3, 14, 15, 16])

    # normlization keypoints  Suppose using the camera parameter
    keypoints = normalize_screen_coordinates(keypoints[..., :2],
                                             w=1000,
                                             h=1002)

    model_pos = TemporalModel(17,
                              2,
                              17,
                              filter_widths=[3, 3, 3, 3, 3],
                              causal=args.causal,
                              dropout=args.dropout,
                              channels=args.channels,
                              dense=args.dense)

    if torch.cuda.is_available():
        model_pos = model_pos.cuda()

    ckpt, time1 = ckpt_time(time0)
    print('-------------- load data spends {:.2f} seconds'.format(ckpt))

    # load trained model
    chk_filename = os.path.join(args.checkpoint,
                                args.resume if args.resume else args.evaluate)
    print('Loading checkpoint', chk_filename)
    checkpoint = torch.load(
        chk_filename,
        map_location=lambda storage, loc: storage)  # 把loc映射到storage
    model_pos.load_state_dict(checkpoint['model_pos'])

    ckpt, time2 = ckpt_time(time1)
    print('-------------- load 3D model spends {:.2f} seconds'.format(ckpt))

    #  Receptive field: 243 frames for args.arc [3, 3, 3, 3, 3]
    receptive_field = model_pos.receptive_field()
    pad = (receptive_field - 1) // 2  # Padding on each side
    causal_shift = 0

    print('Rendering...')
    input_keypoints = keypoints.copy()
    gen = UnchunkedGenerator(None,
                             None, [input_keypoints],
                             pad=pad,
                             causal_shift=causal_shift,
                             augment=args.test_time_augmentation,
                             kps_left=kps_left,
                             kps_right=kps_right,
                             joints_left=joints_left,
                             joints_right=joints_right)
    prediction = evaluate(gen, model_pos, return_predictions=True)

    # save 3D joint points
    np.save('outputs/test_3d_output.npy', prediction, allow_pickle=True)

    rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804],
                   dtype=np.float32)
    prediction = camera_to_world(prediction, R=rot, t=0)

    # We don't have the trajectory, but at least we can rebase the height
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])
    anim_output = {'Reconstruction': prediction}
    input_keypoints = image_coordinates(input_keypoints[..., :2],
                                        w=1000,
                                        h=1002)

    ckpt, time3 = ckpt_time(time2)
    print(
        '-------------- generate reconstruction 3D data spends {:.2f} seconds'.
        format(ckpt))

    if not args.viz_output:
        args.viz_output = 'outputs/alpha_result.mp4'

    from common.visualization import render_animation
    render_animation(input_keypoints,
                     anim_output,
                     Skeleton(),
                     25,
                     args.viz_bitrate,
                     np.array(70., dtype=np.float32),
                     args.viz_output,
                     limit=args.viz_limit,
                     downsample=args.viz_downsample,
                     size=args.viz_size,
                     input_video_path=args.viz_video,
                     viewport=(1000, 1002),
                     input_video_skip=args.viz_skip)

    ckpt, time4 = ckpt_time(time3)
    print('total spend {:2f} second'.format(ckpt))
Exemplo n.º 4
0
def main(args):
    # 第一步:检测2D关键点
    detector_2d = get_detector_2d(args.detector_2d)

    assert detector_2d, 'detector_2d should be in ({alpha, hr, open}_pose)'

    # 2D kpts loads or generate
    if not args.input_npz:
        video_name = args.viz_video
        keypoints = detector_2d(video_name)
    else:
        npz = np.load(args.input_npz)
        keypoints = npz['kpts']  # (N, 17, 2)

    # 第二步:将2D关键点转换为3D关键点
    keypoints_symmetry = metadata['keypoints_symmetry']
    kps_left, kps_right = list(keypoints_symmetry[0]), list(
        keypoints_symmetry[1])
    joints_left, joints_right = list([4, 5, 6, 11, 12,
                                      13]), list([1, 2, 3, 14, 15, 16])

    # normlization keypoints  Suppose using the camera parameter
    keypoints = normalize_screen_coordinates(keypoints[..., :2],
                                             w=1000,
                                             h=1002)

    model_pos = TemporalModel(17,
                              2,
                              17,
                              filter_widths=[3, 3, 3, 3, 3],
                              causal=args.causal,
                              dropout=args.dropout,
                              channels=args.channels,
                              dense=args.dense)

    if torch.cuda.is_available():
        model_pos = model_pos.cuda()

    ckpt, time1 = ckpt_time(time0)
    print('-------------- load data spends {:.2f} seconds'.format(ckpt))

    # load trained model
    chk_filename = os.path.join(args.checkpoint,
                                args.resume if args.resume else args.evaluate)
    print('Loading checkpoint', chk_filename)
    checkpoint = torch.load(
        chk_filename,
        map_location=lambda storage, loc: storage)  # 把loc映射到storage
    model_pos.load_state_dict(checkpoint['model_pos'])

    ckpt, time2 = ckpt_time(time1)
    print('-------------- load 3D model spends {:.2f} seconds'.format(ckpt))

    #  Receptive field: 243 frames for args.arc [3, 3, 3, 3, 3]
    receptive_field = model_pos.receptive_field()
    pad = (receptive_field - 1) // 2  # Padding on each side
    causal_shift = 0

    print('Rendering...')
    input_keypoints = keypoints.copy()
    gen = UnchunkedGenerator(None,
                             None, [input_keypoints],
                             pad=pad,
                             causal_shift=causal_shift,
                             augment=args.test_time_augmentation,
                             kps_left=kps_left,
                             kps_right=kps_right,
                             joints_left=joints_left,
                             joints_right=joints_right)
    prediction = evaluate(gen, model_pos, return_predictions=True)

    # save 3D joint points 保存三维关节点
    np.save('outputs/test_3d_output.npy', prediction, allow_pickle=True)

    # 第三步:将预测的三维点从相机坐标系转换到世界坐标系
    # (1)第一种转换方法
    rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804],
                   dtype=np.float32)
    prediction = camera_to_world(prediction, R=rot, t=0)
    # We don't have the trajectory, but at least we can rebase the height将预测的三维点的Z值减去预测的三维点中Z的最小值,得到正向的Z值
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])

    # (2)第二种转换方法
    # subject = 'S1'
    # cam_id = '55011271'
    # cam_params = load_camera_params('./camera/cameras.h5')[subject][cam_id]
    # R = cam_params['R']
    # T = 0
    # azimuth = cam_params['azimuth']
    #
    # prediction = camera2world(pose=prediction, R=R, T=T)
    # prediction[:, :, 2] -= np.min(prediction[:, :, 2])  # rebase the height

    # 第四步:将3D关键点输出并将预测的3D点转换为bvh骨骼
    # 将三维预测点输出
    write_3d_point(args.viz_output, prediction)

    # 将预测的三维骨骼点转换为bvh骨骼
    prediction_copy = np.copy(prediction)
    write_standard_bvh(args.viz_output, prediction_copy)  #转为标准bvh骨骼
    write_smartbody_bvh(args.viz_output, prediction_copy)  #转为SmartBody所需的bvh骨骼

    anim_output = {'Reconstruction': prediction}
    input_keypoints = image_coordinates(input_keypoints[..., :2],
                                        w=1000,
                                        h=1002)

    ckpt, time3 = ckpt_time(time2)
    print(
        '-------------- generate reconstruction 3D data spends {:.2f} seconds'.
        format(ckpt))

    if not args.viz_output:
        args.viz_output = 'outputs/outputvideo/alpha_result.mp4'

    # 第五步:生成输出视频
    # from common.visualization import render_animation
    # render_animation(input_keypoints, anim_output,
    #                  Skeleton(), 25, args.viz_bitrate, np.array(70., dtype=np.float32), args.viz_output,
    #                  limit=args.viz_limit, downsample=args.viz_downsample, size=args.viz_size,
    #                  input_video_path=args.viz_video, viewport=(1000, 1002),
    #                  input_video_skip=args.viz_skip)

    ckpt, time4 = ckpt_time(time3)
    print('total spend {:2f} second'.format(ckpt))