Exemplo n.º 1
0
def test_vgnet(config_path, example_img, example_landmark, lmk_seq):
    example_img = cv2.resize(example_img,
                             (128, 128)).astype(np.float32)[np.newaxis, ...]
    example_img /= 256.0
    example_img = (example_img - 0.5) / 0.5

    params = YParams(config_path, 'default')

    g2 = tf.Graph()
    with g2.as_default():
        example_landmark = tf.convert_to_tensor(
            example_landmark[np.newaxis, :], dtype=tf.float32)
        example_img = tf.convert_to_tensor(example_img, dtype=tf.float32)
        seq_len = tf.convert_to_tensor(np.array([lmk_seq.shape[1]]),
                                       dtype=tf.int32)
        lmk_seq = tf.convert_to_tensor((lmk_seq), dtype=tf.float32)

        vgnet = VGNet(config_path)
        params = vgnet.params
        params.batch_size = 1
        vgnet.set_params(params)

        infer_nodes = vgnet.build_inference_op(lmk_seq, example_landmark,
                                               example_img, seq_len)

        sess = tf.Session(graph=g2)
        sess.run(tf.global_variables_initializer())
        tf.train.Saver().restore(sess, 'ckpt_vgnet/vgnet-70000')
        img_seq = sess.run(infer_nodes['Fake_img_seq'])

    save_imgseq_video(img_seq, "vgnet.mp4", wav_file)
Exemplo n.º 2
0
 def default_hparams(config_path, name='default'):
   params = YParams(config_path, name)
   params.training['learning_rate'] = 0.001
   params.training['decay_steps'] = 1000
   params.training['decay_rate'] = 0.95
   params.add_hparam('landmark_size', 136)
   return params
Exemplo n.º 3
0
    def default_hparams(config_path, name='default'):
        params = YParams(config_path, name)
        params.add_hparam('separable_conv', False)
        params.add_hparam('ngf', 64)
        params.add_hparam('ndf', 64)
        params.add_hparam('l1_weight', 500.0)
        params.add_hparam('gan_weight', 1.0)

        params.training['learning_rate'] = 0.0003
        params.training['beta1'] = 0.5
        params.training['decay_rate'] = 0.999

        return params
Exemplo n.º 4
0
def test_atnet(config_path):
    global wav_file
    global img_path
    img = cv2.imread(img_path)
    example_img, example_lmk = face_alignment(img)

    params = YParams(config_path, 'default')
    sample_rate = params.mel['sample_rate']
    hop_step = params.mel['hop_step']
    win_length = params.mel['win_length']
    frame_rate = params.frame_rate
    mean = np.load(params.mean_file)
    component = np.load(params.components_file)

    example_lmk = np.dot((example_lmk - mean), component[:, :20])
    example_lmk *= np.array([
        1.5, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1
    ])
    example_lmk = np.dot(example_lmk, component[:, :20].T)

    wav_loader = WavLoader(sr=sample_rate)

    pose = np.ones([1000, 3], dtype=np.float32) * 0.0
    ear = np.ones([1000, 1], dtype=np.float32) * 0.6
    ear[40:75, :] = np.ones([35, 1], dtype=np.float32) * 0.2

    pcm = wav_loader.get_data(wav_file)

    frame_wav_scale = sample_rate / frame_rate
    frame_mfcc_scale = frame_wav_scale / hop_step

    assert (frame_mfcc_scale - int(frame_mfcc_scale) == 0
            ), "sample_rate/hop_step must divided by frame_rate."

    frame_mfcc_scale = int(frame_mfcc_scale)
    min_len = min(ear.shape[0], pose.shape[0], pcm.shape[0] // frame_wav_scale)

    g1 = tf.Graph()
    with g1.as_default():

        ear = tf.convert_to_tensor(ear[np.newaxis, :min_len, :],
                                   dtype=tf.float32)
        pose = tf.convert_to_tensor(pose[np.newaxis, :min_len, :],
                                    dtype=tf.float32)
        seq_len = tf.convert_to_tensor(np.array([min_len]), dtype=tf.int32)
        example_landmark = tf.convert_to_tensor(example_lmk[np.newaxis, :],
                                                dtype=tf.float32)

        pcm_length = hop_step * (min_len * frame_mfcc_scale - 1) + win_length
        if (pcm.shape[0] < pcm_length):
            pcm = np.pad(pcm, (0, pcm_length - pcm.shape[0]),
                         'constant',
                         constant_values=(0))
        elif (pcm.shape[0] > pcm_length):
            pcm = pcm[:pcm_length]
        mfcc = extract_mfcc(pcm[np.newaxis, :], params)

        atnet = ATNet(config_path)
        params = atnet.params
        params.batch_size = 1
        atnet.set_params(params)

        infer_nodes = atnet.build_inference_op(ear, pose, mfcc,
                                               example_landmark, seq_len)

        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        tf.train.Saver().restore(sess, 'ckpt_atnet/atnet-80000')
        lmk_seq = sess.run(infer_nodes['LandmarkDecoder'])
        save_lmkseq_video(lmk_seq, mean, "atnet.avi", wav_file)

    return example_img, example_lmk, lmk_seq
Exemplo n.º 5
0
                        if (file.endswith('.jpg')):
                            count += 1

                    sample_index += 1
                    if (sample_index % (train_by_eval + 1) == 0):
                        eval_file.write("{}|{}\n".format(root, count))
                    else:
                        train_file.write("{}|{}\n".format(root, count))


if (__name__ == '__main__'):
    cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
    cmd_parser.add_option('--config_path',
                          type="string",
                          dest="config_path",
                          help='the config json file')

    opts, argv = cmd_parser.parse_args()

    if (not opts.config_path is None):
        config_path = opts.config_path

        if (not os.path.exists(config_path)):
            logger.error('config_path not exists')
            exit(0)

        params = YParams(config_path, 'default')
        write_dataset(params)
    else:
        print('Please check your parameters.')
Exemplo n.º 6
0
    def default_hparams(config_path, name='default'):
        params = YParams(config_path, name)
        params.add_hparam('thinresnet_scale', [1, 32])
        params.add_hparam('thinresnet_output_channels', 256)
        params.add_hparam('encode_embedding_size', 128)
        params.add_hparam('decode_embedding_size', 128)
        params.add_hparam('rnn_hidden_size', 128)
        params.add_hparam('rnn_layers', 1)
        params.add_hparam('landmark_size', 136)
        params.add_hparam('eye_index_start', 72)
        params.add_hparam('eye_lmk_size', 24)

        params.training['learning_rate'] = 0.001
        params.training['decay_steps'] = 1000
        params.training['decay_rate'] = 0.95

        return params