policy.load_state_dict(torch.load('exp/p' + num + '.model')) reward_mean = 0 total_frame = 0 e = 1 iteration = 0 with torch.no_grad(): policy.eval() start = time.time() while e < 2: feats, length, e = next(data_loader) inputs = np.asarray(feats, dtype=np.float32) inputs = torch.from_numpy(inputs).to(device) length = np.asarray(length, dtype=np.int32) dur = 1.0 * length / frameRate_Hz action = policy(inputs, length.tolist(), False) r_calc_t = utils.trans_param(action.detach())[:, :, :24] r_calc_g = utils.trans_param(action.detach())[:, :, -6:] r_calc = utils.calc_reward(inputs, r_calc_t, r_calc_g, length, dur, NUM_PARAL) for i in range(BATCH_SIZE): reward_mean += F.mse_loss(torch.from_numpy(r_calc[i][:length[i]]).to(device), inputs[i, :length[i]], reduction='none').mean(dim=1).sum() total_frame += length.sum() iteration += 1 if iteration % 10 == 0: logging.info('{} iter passed'.format(iteration)) logging.info(reward_mean/total_frame) logging.info(time.time()-start)
feats = torch.from_numpy(feats.T[:-1]).to(device) with torch.no_grad(): policy.eval() ### Assume the duration is multiple of 10ms. ### For example, if the time length of input feature is 101, this sound is from 1000ms to 1009ms, ### but I regard this as 1000ms and ignore the last feature. inputs = feats.unsqueeze(dim=0) length = [inputs.shape[1]] length = np.asarray(length, dtype=np.int32) dur = 1.0 * length / frameRate_Hz ### dimension is sec. ### Get action action = policy(inputs, length.tolist(), False) action = utils.trans_param(action) ### Store parameters tractParams = action[0, :length[0], :24].reshape(1, length[0], 24) glottisParams = action[0, :length[0], -6:].reshape(1, length[0], 6) t_param = tractParams.to('cpu') t_tmp = torch.zeros(1, 5, 24) t_tmp[:, 2, :] = t_param[:, 0, :] / 3 t_tmp[:, 3, :] = t_param[:, 0, :] * 2 / 3 t_tmp[:, 4, :] = t_param[:, 0, :] t_param = torch.cat((t_tmp, t_param), dim=1) for i in range(5): t_param = torch.cat((t_param, t_param[:, -1, :].unsqueeze(dim=1)), dim=1) g_param = glottisParams.to('cpu') g_tmp = torch.zeros(1, 5, 6)
m = mean.detach().to('cpu').numpy() v = var.detach().to('cpu').numpy() dm = librosa.feature.delta(m, width=9, order=1, axis=1) ddm = librosa.feature.delta(m, width=9, order=2, axis=1) dv = librosa.feature.delta(v, width=9, order=1, axis=1) dv = 2 * v + dv dv = np.where(dv <= 0, 1e-10, dv) ddv = librosa.feature.delta(dv, width=9, order=1, axis=1) ddv = 2 * dv + ddv ddv = np.where(ddv <= 0, 1e-10, ddv) m = np.concatenate((m, dm, ddm), axis=2) v = np.concatenate((v, dv, ddv), axis=2) action = G.mlpg(m[0], v[0], windows) action = torch.from_numpy(np.asarray(action, dtype=np.float32)).to(device) action = utils.trans_param(action).unsqueeze(dim=0) ### Store parameters tractParams = action[0, :length[0], :24].reshape(1, length[0], 24) glottisParams = action[0, :length[0], -6:].reshape(1, length[0], 6) t_param = tractParams.to('cpu') t_tmp = torch.zeros(1, 5, 24) t_tmp[:, 2, :] = t_param[:, 0, :] / 3 t_tmp[:, 3, :] = t_param[:, 0, :] * 2 / 3 t_tmp[:, 4, :] = t_param[:, 0, :] t_param = torch.cat((t_tmp, t_param), dim=1) for i in range(5): t_param = torch.cat((t_param, t_param[:, -1, :].unsqueeze(dim=1)), dim=1) g_param = glottisParams.to('cpu') g_tmp = torch.zeros(1, 5, 6)
ddv = librosa.feature.delta(dv, width=9, order=1, axis=1) ddv = 2 * dv + ddv ddv = np.where(ddv <= 0, 1e-10, ddv) m = np.concatenate((m, dm, ddm), axis=2) v = np.concatenate((v, dv, ddv), axis=2) action = np.zeros((BATCH_SIZE, length[0], OUT_SIZE)) for i in range(BATCH_SIZE): action[i] = G.mlpg(m[i], v[i], windows) action = torch.from_numpy(np.asarray(action, dtype=np.float32)).to(device) action = torch.clamp(action, min=0.0, max=1.0) gauss_dist = torch.distributions.normal.Normal(mean, var) log_prob = gauss_dist.log_prob(action).sum(dim=-1).sum(dim=-1) ### Store parameters tractParams = utils.trans_param(action)[:, :, :24] glottisParams = utils.trans_param(action)[:, :, -6:] ### Reward calculation reward = utils.calc_reward(inputs, tractParams, glottisParams, length, dur, NUM_PARAL) reward_mean = 0 for i in range(BATCH_SIZE): reward_mean += F.mse_loss(torch.from_numpy( reward[i][:length[i]]).to(device), inputs[i, :length[i]], reduction='none').mean(dim=1).sum() reward_mean /= length.sum() ### update Q-function q_value = q_func(action, length.tolist())
length = np.asarray(length, dtype=np.int32) - 1 length = np.where(length > AUDIO_SEGMENT, AUDIO_SEGMENT, length) dur = 1.0 * length / frameRate_Hz ### dimension is sec. ### Get action noise_act = policy(inputs, length.tolist(), True) action = policy(inputs, length.tolist(), False) if (iteration + 1) % ADJUST_STEP == 0: if F.mse_loss(noise_act, action) < DELTA**2: policy.fc.sigma_init *= 1.01 else: policy.fc.sigma_init /= 1.01 ### Store parameters tractParams = utils.trans_param(noise_act)[:, :, :24] glottisParams = utils.trans_param(noise_act)[:, :, -6:] r_calc_t = utils.trans_param(action.detach())[:, :, :24] r_calc_g = utils.trans_param(action.detach())[:, :, -6:] ### Reward calculation reward = utils.calc_reward(inputs, tractParams, glottisParams, length, dur, NUM_PARAL) r_calc = utils.calc_reward(inputs, r_calc_t, r_calc_g, length, dur, NUM_PARAL) reward_mean = 0 for i in range(BATCH_SIZE): reward_mean += F.mse_loss(torch.from_numpy( r_calc[i][:length[i]]).to(device), inputs[i, :length[i]], reduction='none').mean(dim=1).sum()