示例#1
0
def handle2x(config, args):
    # resize input
    h1, w1, scale1 = pad_to_height(config.img_size[0], args.img1_height, args.img1_width)
    h2, w2, scale2 = pad_to_height(config.img_size[0], args.img2_height, args.img2_width)

    # load trained model
    net = get_autoencoder(config)
    net.load_state_dict(torch.load(args.model_path))
    net.to(config.device)
    net.eval()

    # mean/std pose
    mean_pose, std_pose = get_meanpose(config)

    # get input
    input1 = openpose2motion(args.vid1_json_dir, scale=scale1, max_frame=args.max_length)
    input2 = openpose2motion(args.vid2_json_dir, scale=scale2, max_frame=args.max_length)
    input1 = preprocess_motion2d(input1, mean_pose, std_pose)
    input2 = preprocess_motion2d(input2, mean_pose, std_pose)
    input1 = input1.to(config.device)
    input2 = input2.to(config.device)

    # transfer by network
    out12 = net.transfer(input1, input2)
    out21 = net.transfer(input2, input1)

    # postprocessing the outputs
    input1 = postprocess_motion2d(input1, mean_pose, std_pose, w1 // 2, h1 // 2)
    input2 = postprocess_motion2d(input2, mean_pose, std_pose, w2 // 2, h2 // 2)
    out12 = postprocess_motion2d(out12, mean_pose, std_pose, w2 // 2, h2 // 2)
    out21 = postprocess_motion2d(out21, mean_pose, std_pose, w1 // 2, h1 // 2)

    if not args.disable_smooth:
        out12 = gaussian_filter1d(out12, sigma=2, axis=-1)
        out21 = gaussian_filter1d(out21, sigma=2, axis=-1)

    if args.out_dir is not None:
        save_dir = args.out_dir
        ensure_dir(save_dir)
        color1 = hex2rgb(args.color1)
        color2 = hex2rgb(args.color2)
        np.savez(os.path.join(save_dir, 'results.npz'),
                 input1=input1,
                 input2=input2,
                 out12=out12,
                 out21=out21)
        if args.render_video:
            print("Generating videos...")
            motion2video(input1, h1, w1, os.path.join(save_dir, 'input1.mp4'), color1, args.transparency,
                         fps=args.fps, save_frame=args.save_frame)
            motion2video(input2, h2, w2, os.path.join(save_dir,'input2.mp4'), color2, args.transparency,
                         fps=args.fps, save_frame=args.save_frame)
            motion2video(out12, h2, w2, os.path.join(save_dir,'out12.mp4'), color2, args.transparency,
                         fps=args.fps, save_frame=args.save_frame)
            motion2video(out21, h1, w1, os.path.join(save_dir,'out21.mp4'), color1, args.transparency,
                         fps=args.fps, save_frame=args.save_frame)
    print("Done.")
示例#2
0
def train_autoencoder():
    print("found {} files".format(len(wav_files)))
    encoder = keras.models.load_model("ae/encoder-test2.h")
    decoder = keras.models.load_model("ae/decoder-test2.h")

    train_generator = NoteIsoSequence(train_wav_files,
                                      sample_duration=sample_duration,
                                      sample_rate=sample_rate,
                                      n_fft=n_fft,
                                      batch_size=batch_size,
                                      epsilon=epsilon,
                                      song_indices=song_indices,
                                      instr_indices=instr_indices,
                                      note_indices=note_indices)
    test_generator = NoteIsoSequence(test_wav_files,
                                     sample_duration=sample_duration,
                                     sample_rate=sample_rate,
                                     n_fft=n_fft,
                                     batch_size=batch_size,
                                     epsilon=epsilon,
                                     song_indices=song_indices,
                                     instr_indices=instr_indices,
                                     note_indices=note_indices)

    #     tb = keras.callbacks.TensorBoard(histogram_freq=0, write_grads=True)
    encoder, decoder, autoencoder = get_autoencoder(encoder, decoder)
    autoencoder.summary()

    now = datetime.now()
    log_dir = "logs/ae-" + now.strftime("%Y-%m-%d-%H:%M:%S") + "/"
    callbacks = [
        TensorBoardWrapper(test_generator,
                           log_dir=log_dir,
                           nb_steps=5,
                           histogram_freq=0,
                           batch_size=batch_size,
                           write_graph=False,
                           write_grads=True,
                           write_images=False)
    ]

    autoencoder.fit_generator(generator=train_generator,
                              validation_data=test_generator,
                              use_multiprocessing=use_multiprocessing,
                              workers=workers,
                              epochs=epochs,
                              steps_per_epoch=steps_per_epoch,
                              validation_steps=validation_steps,
                              callbacks=callbacks)

    print("saving model...")
    encoder.save("ae/encoder-test2.h")
    decoder.save("ae/decoder-test2.h")
    autoencoder.save("ae/ae-test2.h")
    print("saved autoencoder.")
示例#3
0
def test():
    parser = argparse.ArgumentParser()
    parser.add_argument('-n',
                        '--name',
                        type=str,
                        choices=['skeleton', 'view', 'full'],
                        required=True,
                        help='which structure to use.')
    parser.add_argument('-p',
                        '--model_path',
                        type=str,
                        default="model/pretrained_view.pth")
    parser.add_argument('--phase',
                        type=str,
                        default="test",
                        choices=['train', 'test'])
    parser.add_argument('-g',
                        '--gpu_ids',
                        type=int,
                        default=0,
                        required=False,
                        help="specify gpu ids")
    args = parser.parse_args()

    # set config
    config.initialize(args)
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_ids)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # load trained model
    net = get_autoencoder(config)
    net.load_state_dict(torch.load(args.model_path))
    net.to(config.device)
    net.eval()

    # get dataset
    train_ds = MixamoDatasetForFull(args.phase, config)
    cluster_data = train_ds.get_cluster_data()

    # score, img = cluster_body(net, cluster_data, device, './cluster_body.png')
    if args.name == 'view':
        cluster_view(net, cluster_data, device, './cluster_view.png')
        cluster_motion(net, cluster_data, device, './cluster_motion.png')
    elif args.name == 'skeleton':
        cluster_body(net, cluster_data, device, './cluster_body.png')
        cluster_motion(net,
                       cluster_data,
                       device,
                       './cluster_motion.png',
                       mode='body')
    else:
        cluster_motion(net, cluster_data, device, './cluster_motion.png')
示例#4
0
def main():
    run_config = tf.contrib.learn.RunConfig(save_checkpoints_steps=1000)

    hparams = tf.contrib.training.HParams(type="image",
                                          batch_size=64,
                                          learning_rate=0.01,
                                          lr_scheme="exp",
                                          delay=0,
                                          staircased=False,
                                          learning_rate_decay_interval=2000,
                                          learning_rate_decay_rate=0.1,
                                          clip_grad_norm=1.0,
                                          l2_loss=0.0,
                                          label_smoothing=0.1,
                                          init_scheme="random",
                                          warmup_steps=10000,
                                          encoder_depth=2,
                                          decoder_depth=2,
                                          hidden_size=100,
                                          is_ae=True,
                                          activation=tf.nn.sigmoid,
                                          enc_layers=[50, 50],
                                          dec_layers=[50],
                                          label_shape=[1],
                                          dropout=0,
                                          channels=1,
                                          input_shape=[28, 28, 1],
                                          output_shape=[28, 28, 1])

    train_input_fn = get_mnist("tmp/data", hparams, training=True)
    eval_input_fn = get_mnist("tmp/data", hparams, training=False)

    estimator = tf.estimator.Estimator(model_fn=get_autoencoder(hparams, 0.01),
                                       model_dir="tmp/run",
                                       config=run_config)

    estimator.train(train_input_fn, steps=100)
    estimator.evaluate(eval_input_fn, steps=10)
示例#5
0
def motion_feature_extract(config, args):
    # resize input
    h1, w1, scale1 = pad_to_height(config.img_size[0], args.img1_height, args.img1_width)
    
    # load trained model
    net = get_autoencoder(config)
    net.load_state_dict(torch.load(args.model_path))
    net.to(config.device)
    net.eval()

    # mean/std pose
    mean_pose, std_pose = get_meanpose(config)

    # get input
    input1 = openpose2motion(args.vid1_json_dir, scale=scale1, max_frame=args.max_length)
    print("after motion")
    print(input1.shape)
    input1 = preprocess_motion2d(input1, mean_pose, std_pose)
    print("after preprocess")
    print(input1.shape)

    if args.out_dir is not None:
        save_dir = args.out_dir
        ensure_dir(save_dir)
    #     color1 = hex2rgb(args.color1)
    #     color2 = hex2rgb(args.color2)
        np.savez(os.path.join(save_dir, 'pose_feature.npz'), pose=input1)

    input1 = input1.to(config.device)

    # transfer by network
    # out = net.transfer_three(input1, input2, input3)
    out = net.forward(input1)
    mot = net.mot_encoder(input1)
    print(mot.shape)
    # postprocessing the outputs
    input1 = postprocess_motion2d(input1, mean_pose, std_pose, w1 // 2, h1 // 2)
    out = postprocess_motion2d(out, mean_pose, std_pose, w1 // 2, h1 // 2)
    print("after postprocess")
    print(input1.shape)

    if not args.disable_smooth:
        out = gaussian_filter1d(out, sigma=2, axis=-1)

    # if args.out_dir is not None:
    #     save_dir = args.out_dir
    #     ensure_dir(save_dir)
    # #     color1 = hex2rgb(args.color1)
    # #     color2 = hex2rgb(args.color2)
    #     np.savez(os.path.join(save_dir, 'results.npz'), pose=input1)
        #          input1=input1,
        #          input2=input2,
        #          input3=input3,
        #          out=out)
        # if args.render_video:
        #     print("Generating videos...")
        #     motion2video(input1, h1, w1, os.path.join(save_dir,'input1.mp4'), color1, args.transparency,
        #                  fps=args.fps, save_frame=args.save_frame)
            # motion2video(input2, h2, w2, os.path.join(save_dir,'input2.mp4'), color2, args.transparency,
            #              fps=args.fps, save_frame=args.save_frame)
            # motion2video(input3, h3, w3, os.path.join(save_dir,'input3.mp4'), color3, args.transparency,
            #              fps=args.fps, save_frame=args.save_frame)
            # motion2video(out, h1, w1, os.path.join(save_dir,'out.mp4'), color2, args.transparency,
            #              fps=args.fps, save_frame=args.save_frame)

    print("Done.")
示例#6
0
from generator_pair import Dual_Track_Generator
from loss import (pair_loss, cumulative_point_distance_error, mean_point_distance_error, 
                  pair_cumulative_point_distance_error, pair_mean_point_distance_error, 
                  loss_weight_adjustments, base_loss_function)

from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TensorBoard
from keras.optimizers import Adam

# input shape of 2 poses/frame
input_shape = (2, 2 * number_of_coordinates)

# layer_sizes = [30, 20, 10]
# layer_sizes = [128, 64, 10]
# layer_sizes = [256, 128, 64, 10]
layer_sizes = [512, 256, 128, 64, 10]
autoencoder, encoder, decoder = get_autoencoder(input_shape, layer_sizes, is_variational=False, verbose=True)
model = get_sequence_model(autoencoder, (input_shape), number_of_frames=number_of_frames, factor=2, verbose=True)

sequences_df = pd.read_csv("pose_pair_sequences.csv", sep=",", header=0, index_col=None)
print(len(sequences_df), "records:")
print(sequences_df.head(5))

# get all counts
print()
print("Counts:")
counts_df = sequences_df.groupby(["sequence_id"], as_index=False).count().loc[:, ["sequence_id", "step"]]
print(counts_df.head(5))

# extract all sequence IDs with at least ${number_of_frames} steps
print()
print("Suitable training sequences:")
示例#7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-n', '--name', type=str, choices=['skeleton', 'view', 'full'], required=True,
                        help='which structure to use')
    # parser.add_argument('-c', '--continue', dest='continue_path', type=str, required=False)
    parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False, help="specify gpu ids")
    parser.add_argument('--disable_triplet', action='store_true', default=False, help="disable triplet loss")
    parser.add_argument('--use_footvel_loss', action='store_true', default=False, help="use use footvel loss")
    parser.add_argument('--vis', action='store_true', default=False, help="visualize output in training")
    args = parser.parse_args()

    config.initialize(args)

    net = get_autoencoder(config)
    print(net)
    net = net.to(config.device)

    # create tensorboard writer
    train_tb = SummaryWriter(os.path.join(config.log_dir, 'train.events'))
    val_tb = SummaryWriter(os.path.join(config.log_dir, 'val.events'))

    # create dataloader
    train_loader = get_dataloader('train', config, config.batch_size, config.num_workers)
    mean_pose, std_pose = train_loader.dataset.mean_pose, train_loader.dataset.std_pose
    val_loader = get_dataloader('test', config, config.batch_size, config.num_workers)
    val_loader = cycle(val_loader)

    # create training agent
    tr_agent = get_training_agent(config, net)
    clock = tr_agent.clock

    # start training
    for e in range(config.nr_epochs):

        # begin iteration
        pbar = tqdm(train_loader)
        for b, data in enumerate(pbar):
            # train step
            outputs, losses = tr_agent.train_func(data)

            losses_values = {k:v.item() for k, v in losses.items()}

            # record loss to tensorboard
            for k, v in losses_values.items():
                train_tb.add_scalar(k, v, clock.step)

            # visualize
            if args.vis and clock.step % config.visualize_frequency == 0:
                imgs = visulize_motion_in_training(outputs, mean_pose, std_pose)
                for k, img in imgs.items():
                    train_tb.add_image(k, torch.from_numpy(img), clock.step)

            pbar.set_description("EPOCH[{}][{}/{}]".format(e, b, len(train_loader)))
            pbar.set_postfix(OrderedDict({"loss": sum(losses_values.values())}))

            # validation step
            if clock.step % config.val_frequency == 0:
                data = next(val_loader)

                outputs, losses = tr_agent.val_func(data)

                losses_values = {k: v.item() for k, v in losses.items()}

                for k, v in losses_values.items():
                    val_tb.add_scalar(k, v, clock.step)

                if args.vis and clock.step % config.visualize_frequency == 0:
                    imgs = visulize_motion_in_training(outputs, mean_pose, std_pose)
                    for k, img in imgs.items():
                        val_tb.add_image(k, torch.from_numpy(img), clock.step)

            clock.tick()

        train_tb.add_scalar('learning_rate', tr_agent.optimizer.param_groups[-1]['lr'], clock.epoch)
        tr_agent.update_learning_rate()

        if clock.epoch % config.save_frequency == 0:
            tr_agent.save_network()
        tr_agent.save_network('latest.pth.tar')

        clock.tock()
示例#8
0
def handle2x(config, args):
    w1 = h1 = w2 = h2 = 512

    # load trained model
    net = get_autoencoder(config)
    net.load_state_dict(torch.load(args.model_path))
    net.to(config.device)
    net.eval()

    # mean/std pose
    mean_pose, std_pose = get_meanpose(config)

    # get input
    dataloder = get_dataloader('test', config)
    input1 = dataloder.dataset.preprocessing(args.path1,
                                             args.view1).unsqueeze(0)
    input2 = dataloder.dataset.preprocessing(args.path2,
                                             args.view2).unsqueeze(0)
    input1 = input1.to(config.device)
    input2 = input2.to(config.device)

    # transfer by network
    out12 = net.transfer(input1, input2)
    out21 = net.transfer(input2, input1)

    # postprocessing the outputs
    input1 = postprocess_motion2d(input1, mean_pose, std_pose, w1 // 2,
                                  h1 // 2)
    input2 = postprocess_motion2d(input2, mean_pose, std_pose, w2 // 2,
                                  h2 // 2)
    out12 = postprocess_motion2d(out12, mean_pose, std_pose, w2 // 2, h2 // 2)
    out21 = postprocess_motion2d(out21, mean_pose, std_pose, w1 // 2, h1 // 2)

    if not args.disable_smooth:
        out12 = gaussian_filter1d(out12, sigma=2, axis=-1)
        out21 = gaussian_filter1d(out21, sigma=2, axis=-1)

    if args.out_dir is not None:
        save_dir = args.out_dir
        ensure_dir(save_dir)
        color1 = hex2rgb(args.color1)
        color2 = hex2rgb(args.color2)
        np.savez(os.path.join(save_dir, 'results.npz'),
                 input1=input1,
                 input2=input2,
                 out12=out12,
                 out21=out21)
        if args.render_video:
            print("Generating videos...")
            motion2video(input1,
                         h1,
                         w1,
                         os.path.join(save_dir, 'input1.mp4'),
                         color1,
                         args.transparency,
                         fps=args.fps,
                         save_frame=args.save_frame)
            motion2video(input2,
                         h2,
                         w2,
                         os.path.join(save_dir, 'input2.mp4'),
                         color2,
                         args.transparency,
                         fps=args.fps,
                         save_frame=args.save_frame)
            motion2video(out12,
                         h2,
                         w2,
                         os.path.join(save_dir, 'out12.mp4'),
                         color2,
                         args.transparency,
                         fps=args.fps,
                         save_frame=args.save_frame)
            motion2video(out21,
                         h1,
                         w1,
                         os.path.join(save_dir, 'out21.mp4'),
                         color1,
                         args.transparency,
                         fps=args.fps,
                         save_frame=args.save_frame)
    print("Done.")
    parser.add_argument('--transparency', action='store_true', help="make background transparent in resulting frames")
    parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False)
    args = parser.parse_args()

    config.initialize(args)

    # if keep no attribute, interpolate over all three latent space
    if args.keep_attr == 'none':
        assert args.form == 'line'

    # clip and pad the video
    h1, w1, scale1 = pad_to_height(config.img_size[0], args.img1_height, args.img1_width)
    h2, w2, scale2 = pad_to_height(config.img_size[0], args.img2_height, args.img2_width)

    # load trained model
    net = get_autoencoder(config)
    net.load_state_dict(torch.load(args.model_path))
    net.to(config.device)
    net.eval()

    # mean/std pose
    mean_pose, std_pose = get_meanpose(config)

    # process input data
    input1 = openpose2motion(args.vid1_json_dir, scale=scale1, max_frame=args.max_length)
    input2 = openpose2motion(args.vid2_json_dir, scale=scale2, max_frame=args.max_length)
    if input1.shape[-1] != input2.shape[-1]:
        length = min(input1.shape[-1], input2.shape[-1])
        input1 = input1[:, :, length]
        input2 = input2[:, :, length]
    input1 = preprocess_motion2d(input1, mean_pose, std_pose)