Пример #1
0
def test_backward(ver, gene, n_warmup=11, n_work=121):
    import tensorflow as tf
    import numpy as np
    from mir_util import infer
    import config as cfg
    import sys, time
    import netop
    sys.is_train = True

    batch_size = 1
    n_feature = 5644 // 2

    graph = tf.Graph()
    with graph.as_default():
        # Model
        p_input = tf.random.uniform((batch_size, 64, n_feature, 1),
                                    dtype=tf.float32,
                                    name="p_input")
        p_target = tf.random.uniform((batch_size, 64, n_feature, 2),
                                     dtype=tf.float32,
                                     name="p_target")
        v_pred = infer(p_input, 2, True, ver=ver, gene=gene)
        v_loss = tf.reduce_mean(input_tensor=tf.abs(p_target - v_pred),
                                name="loss0")
        op_optim = tf.compat.v1.train.AdamOptimizer(
            learning_rate=1e-4).minimize(v_loss)

        n_param = netop.count_parameter()
        n_forward_flop = tf.compat.v1.profiler.profile(
            graph,
            options=tf.compat.v1.profiler.ProfileOptionBuilder.float_operation(
            )).total_float_ops
        print(" BWD :Total {:,} parameters in total".format(n_param))
        print(" BWD :Forward + backward operation needs {:,} FLOPS".format(
            n_forward_flop))

        with tf.compat.v1.Session(config=cfg.sess_cfg) as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            for i_step in range(n_warmup + n_work):
                sess.run([v_loss, op_optim])
                if i_step == n_warmup:
                    t = time.time()
        t_train = (time.time() - t) / n_work
        return t_train
Пример #2
0
def test_forward(ver, gene, n_warmup=11, n_work=121):
    import tensorflow as tf
    import numpy as np
    from mir_util import infer
    import config as cfg
    import sys, time
    import netop
    sys.path.append("../lib")
    sys.is_train = False

    batch_size = 1
    n_feature = 5644 // 2

    graph = tf.Graph()
    with graph.as_default():
        # Model
        print("Initialize network")
        with tf.device("/device:GPU:0"):
            p_input = tf.random.uniform((batch_size, 64, n_feature, 1),
                                        dtype=tf.float32,
                                        name="p_input")
            v_pred = infer(p_input, 2, False, ver=ver, gene=gene)

        n_param = netop.count_parameter()
        n_forward_flop = tf.compat.v1.profiler.profile(
            graph,
            options=tf.compat.v1.profiler.ProfileOptionBuilder.float_operation(
            )).total_float_ops
        print(" FWD :Total {:,} parameters in total".format(n_param))
        print(
            " FWD :Forward operation needs {:,} FLOPS".format(n_forward_flop))

        with tf.compat.v1.Session(config=cfg.sess_cfg) as sess:
            # Initialized, Load state
            sess.run(tf.compat.v1.global_variables_initializer())
            for step in range(n_warmup + n_work):
                sess.run(v_pred)
                if step == n_warmup:
                    t = time.time()
        t_eval = (time.time() - t) / n_work
        return t_eval
Пример #3
0
def count(ver, gene_int, n_ch):
  import tensorflow as tf
  from mir_util import infer
  import config as cfg
  import netop

  with cfg.ConfigBoundary(gene_ver=ver, gene_value=gene_int):
    batch_size = 1
    graph = tf.Graph()
    run_meta = tf.compat.v1.RunMetadata()
    with graph.as_default():
      x_mixed = tf.compat.v1.placeholder(tf.float32, shape=(batch_size, 64, cfg.frame_size // 2, 1), name="x_mixed")
      y_mixed = tf.compat.v1.placeholder(tf.float32, shape=(batch_size, 64, cfg.frame_size // 2, n_ch), name="y_mixed")
      y_pred = infer(x_mixed, n_ch, True)
      n_forward_flop = tf.compat.v1.profiler.profile(graph, run_meta=run_meta, cmd="op", options=tf.compat.v1.profiler.ProfileOptionBuilder.float_operation()).total_float_ops
      y_output = tf.multiply(x_mixed, y_pred)
      loss_fn = tf.reduce_mean(input_tensor=tf.abs(y_mixed - y_output) , name="loss0")
      global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step")
      optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=1e-4).minimize(loss_fn, global_step=global_step)

      n_total_flop = tf.compat.v1.profiler.profile(graph, run_meta=run_meta, cmd="op", options=tf.compat.v1.profiler.ProfileOptionBuilder.float_operation()).total_float_ops
      total_parameters = netop.count_parameter()
      return total_parameters, n_forward_flop
Пример #4
0
def eval_gene_core(gene):
    import time
    import tensorflow as tf
    import numpy as np
    from eval_util import bss_eval_sdr
    print(" :GENE: %d" % (geneop.cvtlstint(gene), ))
    n_feature = cfg.n_feature
    tf.compat.v1.reset_default_graph()
    graph = tf.Graph()
    t = time.time()

    with graph.as_default():
        random.seed(0x41526941)
        np.random.seed(0x41526941)
        tf.compat.v1.random.set_random_seed(0x41526941)
        sess_conf = tf.compat.v1.ConfigProto(
            gpu_options=tf.compat.v1.GPUOptions(
                allow_growth=True, per_process_gpu_memory_fraction=1.0),
            allow_soft_placement=True,
        )
        with tf.compat.v1.Session(config=sess_conf) as sess:
            # TRAIN
            p_feature = tf.compat.v1.placeholder(tf.float32,
                                                 shape=(cfg.batch_size,
                                                        cfg.n_hop_per_sample,
                                                        n_feature, 1),
                                                 name='x_mixed')
            p_target = tf.compat.v1.placeholder(
                tf.float32,
                shape=(cfg.batch_size, cfg.n_hop_per_sample, n_feature,
                       n_out_channel),
                name='y_mixed')
            v_pred = geneop.build_from_gene(p_feature, n_out_channel, gene)

            n_param = netop.count_parameter()
            print(" :Total {:,} parameters in total".format(n_param))
            if "neg_gflops" in cfg.result_format:
                n_forward_flop = tf.compat.v1.profiler.profile(
                    graph,
                    options=tf.compat.v1.profiler.ProfileOptionBuilder.
                    float_operation()).total_float_ops
                print(" :Forward operation needs {:,} FLOPS".format(
                    n_forward_flop))

            v_pred_clipped = tf.clip_by_value(v_pred, 0.0, 1.0) * p_feature
            v_loss = tf.reduce_mean(input_tensor=tf.abs(v_pred * p_feature -
                                                        p_target))

            v_step = tf.Variable(0,
                                 dtype=tf.int32,
                                 trainable=False,
                                 name="step")
            p_lr_fac = tf.compat.v1.placeholder(tf.float32, name="p_lr_fac")
            v_lr = p_lr_fac * tf.compat.v1.train.cosine_decay_restarts(
                cfg.max_lr,
                v_step,
                cfg.first_lr_period,
                alpha=cfg.min_lr / cfg.max_lr,
                t_mul=2.0)
            op_optim = tf.compat.v1.train.AdamOptimizer(
                learning_rate=v_lr).minimize(v_loss, global_step=v_step)

            sess.run(tf.compat.v1.global_variables_initializer())
            loss_list = []
            data_feature = np.zeros(
                (cfg.batch_size, cfg.n_hop_per_sample, n_feature, 1))
            data_target = np.zeros((cfg.batch_size, cfg.n_hop_per_sample,
                                    n_feature, n_out_channel))
            seg_idx_list = np.array([])
            for i_step in range(cfg.n_step):
                for i_batch in range(cfg.batch_size):
                    if seg_idx_list.size == 0:
                        seg_idx_list = np.random.permutation(
                            len(train_seg_list))
                    idx = seg_idx_list[0]
                    seg_idx_list = seg_idx_list[1:]
                    spec_mixed, spec_vocal, spec_inst = train_seg_list[idx]
                    start_idx = np.random.randint(
                        0,
                        len(spec_mixed) - cfg.n_hop_per_sample)
                    data_feature[i_batch, :, :,
                                 0] = spec_mixed[start_idx:start_idx +
                                                 cfg.n_hop_per_sample, :]
                    data_target[i_batch, :, :,
                                0] = spec_inst[start_idx:start_idx +
                                               cfg.n_hop_per_sample, :]
                    data_target[i_batch, :, :,
                                1] = spec_vocal[start_idx:start_idx +
                                                cfg.n_hop_per_sample, :]
                if i_step <= cfg.warmup_period:
                    lr_fac = cfg.warmup_fac  # Slow start to prevent some fast values go broken
                else:
                    lr_fac = 1.0
                loss_value, _ = sess.run(
                    [v_loss, op_optim],
                    feed_dict={
                        p_feature: data_feature,
                        p_target: data_target,
                        p_lr_fac: lr_fac
                    })
                loss_list.append(loss_value)
            # EVAL
            sdr_list = []
            valid_sdr_list = []
            ret_list = []
            for i_eval, (real_vocal, real_inst, magn_orig_list, phase_list,
                         norm_fac) in enumerate(eval_seg_list):
                n_hop, _ = magn_orig_list.shape
                magn_inst_list = np.zeros_like(magn_orig_list,
                                               dtype=np.float32)
                magn_vocal_list = np.zeros_like(magn_orig_list,
                                                dtype=np.float32)
                data_feature = np.zeros(
                    (cfg.batch_size, cfg.n_hop_per_sample, n_feature, 1),
                    dtype=np.float32)
                batch_hop_list = []

                def flush_buffer():
                    pred_value, = sess.run([v_pred_clipped],
                                           feed_dict={p_feature: data_feature})
                    for i_batch, (i_batch_hop, offset_begin,
                                  offset_end) in enumerate(batch_hop_list):
                        magn_inst_list[i_batch_hop + offset_begin:i_batch_hop +
                                       offset_end, :-1] = pred_value[
                                           i_batch, offset_begin:offset_end, :,
                                           0]
                        magn_vocal_list[i_batch_hop +
                                        offset_begin:i_batch_hop +
                                        offset_end, :-1] = pred_value[
                                            i_batch,
                                            offset_begin:offset_end, :, 1]
                    data_feature.fill(0.0)
                    batch_hop_list.clear()

                def enqueue_buffer(data, i_batch_hop, offset_begin,
                                   offset_end):
                    if len(batch_hop_list) == cfg.batch_size:
                        flush_buffer()
                    i_batch = len(batch_hop_list)
                    data_feature[i_batch, :data.shape[0], :, 0] = data
                    batch_hop_list.append(
                        (i_batch_hop, offset_begin, offset_end))

                i_hop = 0
                while i_hop + cfg.n_hop_per_sample < n_hop:
                    data = magn_orig_list[i_hop:i_hop +
                                          cfg.n_hop_per_sample, :-1]
                    if i_hop == 0:
                        enqueue_buffer(data, i_hop, 0,
                                       cfg.n_hop_per_sample * 3 // 4)
                    else:
                        enqueue_buffer(data, i_hop, cfg.n_hop_per_sample // 4,
                                       cfg.n_hop_per_sample * 3 // 4)
                    i_hop += cfg.n_hop_per_sample // 2
                data = magn_orig_list[i_hop:, :-1]
                enqueue_buffer(data, i_hop, cfg.n_hop_per_sample // 4,
                               n_hop - i_hop)
                flush_buffer()
                unit_magn = np.exp(1j * phase_list)
                fake_inst = librosa.istft(
                    (magn_inst_list * unit_magn * norm_fac).T,
                    hop_length=cfg.hop_size)
                fake_vocal = librosa.istft(
                    (magn_vocal_list * unit_magn * norm_fac).T,
                    hop_length=cfg.hop_size)
                if (fake_inst <= 1e-8).all() or (fake_vocal <= 1e-8).all():
                    sdr_list.append(-999999)
                else:
                    #saveWav("fakeinst.wav", fake_inst, cfg.work_sr)
                    #saveWav("fakevocal.wav", fake_vocal, cfg.work_sr)
                    ret_list.append(
                        cfg.pool.apply_async(bss_eval_sdr, (
                            np.array([real_inst], dtype=np.float32),
                            np.array([fake_inst], dtype=np.float32),
                        )))
                    ret_list.append(
                        cfg.pool.apply_async(bss_eval_sdr, (
                            np.array([real_vocal], dtype=np.float32),
                            np.array([fake_vocal], dtype=np.float32),
                        )))
            ret_list = [x.get()[0] for x in ret_list]
            for i_eval, sdr in enumerate(zip(ret_list[::2], ret_list[1::2])):
                mean_sdr = np.mean(sdr)
                if i_eval < cfg.n_eval:
                    sdr_list.append(mean_sdr)
                else:
                    valid_sdr_list.append(mean_sdr)

            result_list = []
            for result_type in cfg.result_format:
                if result_type == "sdr":
                    result_list.append(np.mean(sdr_list))
                elif result_type == "neg_mega_pc":
                    result_list.append(-n_param / 1000_000.0)
                elif result_type == "neg_gflops":
                    result_list.append(-n_forward_flop / 1_000_000_000.0)
                elif result_type == "valid_sdr":
                    result_list.append(np.mean(valid_sdr_list))
                else:
                    raise ValueError("Unsupported result_type `%s`" %
                                     (result_type, ))
            print("  EVAL RESULT: t=%.2f, train_loss=%.09f, result=%r" %
                  (time.time() - t, np.mean(loss_list), result_list))
            return result_list