def test_backward(ver, gene, n_warmup=11, n_work=121): import tensorflow as tf import numpy as np from mir_util import infer import config as cfg import sys, time import netop sys.is_train = True batch_size = 1 n_feature = 5644 // 2 graph = tf.Graph() with graph.as_default(): # Model p_input = tf.random.uniform((batch_size, 64, n_feature, 1), dtype=tf.float32, name="p_input") p_target = tf.random.uniform((batch_size, 64, n_feature, 2), dtype=tf.float32, name="p_target") v_pred = infer(p_input, 2, True, ver=ver, gene=gene) v_loss = tf.reduce_mean(input_tensor=tf.abs(p_target - v_pred), name="loss0") op_optim = tf.compat.v1.train.AdamOptimizer( learning_rate=1e-4).minimize(v_loss) n_param = netop.count_parameter() n_forward_flop = tf.compat.v1.profiler.profile( graph, options=tf.compat.v1.profiler.ProfileOptionBuilder.float_operation( )).total_float_ops print(" BWD :Total {:,} parameters in total".format(n_param)) print(" BWD :Forward + backward operation needs {:,} FLOPS".format( n_forward_flop)) with tf.compat.v1.Session(config=cfg.sess_cfg) as sess: sess.run(tf.compat.v1.global_variables_initializer()) for i_step in range(n_warmup + n_work): sess.run([v_loss, op_optim]) if i_step == n_warmup: t = time.time() t_train = (time.time() - t) / n_work return t_train
def test_forward(ver, gene, n_warmup=11, n_work=121): import tensorflow as tf import numpy as np from mir_util import infer import config as cfg import sys, time import netop sys.path.append("../lib") sys.is_train = False batch_size = 1 n_feature = 5644 // 2 graph = tf.Graph() with graph.as_default(): # Model print("Initialize network") with tf.device("/device:GPU:0"): p_input = tf.random.uniform((batch_size, 64, n_feature, 1), dtype=tf.float32, name="p_input") v_pred = infer(p_input, 2, False, ver=ver, gene=gene) n_param = netop.count_parameter() n_forward_flop = tf.compat.v1.profiler.profile( graph, options=tf.compat.v1.profiler.ProfileOptionBuilder.float_operation( )).total_float_ops print(" FWD :Total {:,} parameters in total".format(n_param)) print( " FWD :Forward operation needs {:,} FLOPS".format(n_forward_flop)) with tf.compat.v1.Session(config=cfg.sess_cfg) as sess: # Initialized, Load state sess.run(tf.compat.v1.global_variables_initializer()) for step in range(n_warmup + n_work): sess.run(v_pred) if step == n_warmup: t = time.time() t_eval = (time.time() - t) / n_work return t_eval
def count(ver, gene_int, n_ch): import tensorflow as tf from mir_util import infer import config as cfg import netop with cfg.ConfigBoundary(gene_ver=ver, gene_value=gene_int): batch_size = 1 graph = tf.Graph() run_meta = tf.compat.v1.RunMetadata() with graph.as_default(): x_mixed = tf.compat.v1.placeholder(tf.float32, shape=(batch_size, 64, cfg.frame_size // 2, 1), name="x_mixed") y_mixed = tf.compat.v1.placeholder(tf.float32, shape=(batch_size, 64, cfg.frame_size // 2, n_ch), name="y_mixed") y_pred = infer(x_mixed, n_ch, True) n_forward_flop = tf.compat.v1.profiler.profile(graph, run_meta=run_meta, cmd="op", options=tf.compat.v1.profiler.ProfileOptionBuilder.float_operation()).total_float_ops y_output = tf.multiply(x_mixed, y_pred) loss_fn = tf.reduce_mean(input_tensor=tf.abs(y_mixed - y_output) , name="loss0") global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=1e-4).minimize(loss_fn, global_step=global_step) n_total_flop = tf.compat.v1.profiler.profile(graph, run_meta=run_meta, cmd="op", options=tf.compat.v1.profiler.ProfileOptionBuilder.float_operation()).total_float_ops total_parameters = netop.count_parameter() return total_parameters, n_forward_flop
def eval_gene_core(gene): import time import tensorflow as tf import numpy as np from eval_util import bss_eval_sdr print(" :GENE: %d" % (geneop.cvtlstint(gene), )) n_feature = cfg.n_feature tf.compat.v1.reset_default_graph() graph = tf.Graph() t = time.time() with graph.as_default(): random.seed(0x41526941) np.random.seed(0x41526941) tf.compat.v1.random.set_random_seed(0x41526941) sess_conf = tf.compat.v1.ConfigProto( gpu_options=tf.compat.v1.GPUOptions( allow_growth=True, per_process_gpu_memory_fraction=1.0), allow_soft_placement=True, ) with tf.compat.v1.Session(config=sess_conf) as sess: # TRAIN p_feature = tf.compat.v1.placeholder(tf.float32, shape=(cfg.batch_size, cfg.n_hop_per_sample, n_feature, 1), name='x_mixed') p_target = tf.compat.v1.placeholder( tf.float32, shape=(cfg.batch_size, cfg.n_hop_per_sample, n_feature, n_out_channel), name='y_mixed') v_pred = geneop.build_from_gene(p_feature, n_out_channel, gene) n_param = netop.count_parameter() print(" :Total {:,} parameters in total".format(n_param)) if "neg_gflops" in cfg.result_format: n_forward_flop = tf.compat.v1.profiler.profile( graph, options=tf.compat.v1.profiler.ProfileOptionBuilder. float_operation()).total_float_ops print(" :Forward operation needs {:,} FLOPS".format( n_forward_flop)) v_pred_clipped = tf.clip_by_value(v_pred, 0.0, 1.0) * p_feature v_loss = tf.reduce_mean(input_tensor=tf.abs(v_pred * p_feature - p_target)) v_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="step") p_lr_fac = tf.compat.v1.placeholder(tf.float32, name="p_lr_fac") v_lr = p_lr_fac * tf.compat.v1.train.cosine_decay_restarts( cfg.max_lr, v_step, cfg.first_lr_period, alpha=cfg.min_lr / cfg.max_lr, t_mul=2.0) op_optim = tf.compat.v1.train.AdamOptimizer( learning_rate=v_lr).minimize(v_loss, global_step=v_step) sess.run(tf.compat.v1.global_variables_initializer()) loss_list = [] data_feature = np.zeros( (cfg.batch_size, cfg.n_hop_per_sample, n_feature, 1)) data_target = np.zeros((cfg.batch_size, cfg.n_hop_per_sample, n_feature, n_out_channel)) seg_idx_list = np.array([]) for i_step in range(cfg.n_step): for i_batch in range(cfg.batch_size): if seg_idx_list.size == 0: seg_idx_list = np.random.permutation( len(train_seg_list)) idx = seg_idx_list[0] seg_idx_list = seg_idx_list[1:] spec_mixed, spec_vocal, spec_inst = train_seg_list[idx] start_idx = np.random.randint( 0, len(spec_mixed) - cfg.n_hop_per_sample) data_feature[i_batch, :, :, 0] = spec_mixed[start_idx:start_idx + cfg.n_hop_per_sample, :] data_target[i_batch, :, :, 0] = spec_inst[start_idx:start_idx + cfg.n_hop_per_sample, :] data_target[i_batch, :, :, 1] = spec_vocal[start_idx:start_idx + cfg.n_hop_per_sample, :] if i_step <= cfg.warmup_period: lr_fac = cfg.warmup_fac # Slow start to prevent some fast values go broken else: lr_fac = 1.0 loss_value, _ = sess.run( [v_loss, op_optim], feed_dict={ p_feature: data_feature, p_target: data_target, p_lr_fac: lr_fac }) loss_list.append(loss_value) # EVAL sdr_list = [] valid_sdr_list = [] ret_list = [] for i_eval, (real_vocal, real_inst, magn_orig_list, phase_list, norm_fac) in enumerate(eval_seg_list): n_hop, _ = magn_orig_list.shape magn_inst_list = np.zeros_like(magn_orig_list, dtype=np.float32) magn_vocal_list = np.zeros_like(magn_orig_list, dtype=np.float32) data_feature = np.zeros( (cfg.batch_size, cfg.n_hop_per_sample, n_feature, 1), dtype=np.float32) batch_hop_list = [] def flush_buffer(): pred_value, = sess.run([v_pred_clipped], feed_dict={p_feature: data_feature}) for i_batch, (i_batch_hop, offset_begin, offset_end) in enumerate(batch_hop_list): magn_inst_list[i_batch_hop + offset_begin:i_batch_hop + offset_end, :-1] = pred_value[ i_batch, offset_begin:offset_end, :, 0] magn_vocal_list[i_batch_hop + offset_begin:i_batch_hop + offset_end, :-1] = pred_value[ i_batch, offset_begin:offset_end, :, 1] data_feature.fill(0.0) batch_hop_list.clear() def enqueue_buffer(data, i_batch_hop, offset_begin, offset_end): if len(batch_hop_list) == cfg.batch_size: flush_buffer() i_batch = len(batch_hop_list) data_feature[i_batch, :data.shape[0], :, 0] = data batch_hop_list.append( (i_batch_hop, offset_begin, offset_end)) i_hop = 0 while i_hop + cfg.n_hop_per_sample < n_hop: data = magn_orig_list[i_hop:i_hop + cfg.n_hop_per_sample, :-1] if i_hop == 0: enqueue_buffer(data, i_hop, 0, cfg.n_hop_per_sample * 3 // 4) else: enqueue_buffer(data, i_hop, cfg.n_hop_per_sample // 4, cfg.n_hop_per_sample * 3 // 4) i_hop += cfg.n_hop_per_sample // 2 data = magn_orig_list[i_hop:, :-1] enqueue_buffer(data, i_hop, cfg.n_hop_per_sample // 4, n_hop - i_hop) flush_buffer() unit_magn = np.exp(1j * phase_list) fake_inst = librosa.istft( (magn_inst_list * unit_magn * norm_fac).T, hop_length=cfg.hop_size) fake_vocal = librosa.istft( (magn_vocal_list * unit_magn * norm_fac).T, hop_length=cfg.hop_size) if (fake_inst <= 1e-8).all() or (fake_vocal <= 1e-8).all(): sdr_list.append(-999999) else: #saveWav("fakeinst.wav", fake_inst, cfg.work_sr) #saveWav("fakevocal.wav", fake_vocal, cfg.work_sr) ret_list.append( cfg.pool.apply_async(bss_eval_sdr, ( np.array([real_inst], dtype=np.float32), np.array([fake_inst], dtype=np.float32), ))) ret_list.append( cfg.pool.apply_async(bss_eval_sdr, ( np.array([real_vocal], dtype=np.float32), np.array([fake_vocal], dtype=np.float32), ))) ret_list = [x.get()[0] for x in ret_list] for i_eval, sdr in enumerate(zip(ret_list[::2], ret_list[1::2])): mean_sdr = np.mean(sdr) if i_eval < cfg.n_eval: sdr_list.append(mean_sdr) else: valid_sdr_list.append(mean_sdr) result_list = [] for result_type in cfg.result_format: if result_type == "sdr": result_list.append(np.mean(sdr_list)) elif result_type == "neg_mega_pc": result_list.append(-n_param / 1000_000.0) elif result_type == "neg_gflops": result_list.append(-n_forward_flop / 1_000_000_000.0) elif result_type == "valid_sdr": result_list.append(np.mean(valid_sdr_list)) else: raise ValueError("Unsupported result_type `%s`" % (result_type, )) print(" EVAL RESULT: t=%.2f, train_loss=%.09f, result=%r" % (time.time() - t, np.mean(loss_list), result_list)) return result_list