sentences = [SENTENCES[i]] max_N = len(SENTENCES[i]) L = torch.from_numpy(get_test_data(sentences, max_N)) zeros = torch.from_numpy(np.zeros((1, hp.n_mels, 1), np.float32)) Y = zeros A = None for t in tqdm(range(hp.max_T)): _, Y_t, A = text2mel(L, Y, monotonic_attention=True) Y = torch.cat((zeros, Y_t), -1) _, attention = torch.max(A[0, :, -1], 0) attention = attention.item() if L[0, attention] == vocab.index('E'): # EOS break _, Z = ssrn(Y) Y = Y.cpu().detach().numpy() A = A.cpu().detach().numpy() Z = Z.cpu().detach().numpy() save_to_png('samples/samples_bea_sleepiness_3k/%d-att.png' % (i + 1), A[0, :, :]) save_to_png('samples/samples_bea_sleepiness_3k/%d-mel.png' % (i + 1), Y[0, :, :]) save_to_png('samples/samples_bea_sleepiness_3k/%d-mag.png' % (i + 1), Z[0, :, :]) save_to_wav(Z[0, :, :].T, 'samples/samples_bea_sleepiness_3k/%d-wav.wav' % (i + 1))
sys.exit(1) # synthetize by one by one because there is a batch processing bug! for i in range(len(SENTENCES)): sentences = [SENTENCES[i]] max_N = len(SENTENCES[i]) L = torch.from_numpy(get_test_data(sentences, max_N)) zeros = torch.from_numpy(np.zeros((1, hp.n_mels, 1), np.float32)) Y = zeros A = None for t in tqdm(range(hp.max_T)): _, Y_t, A = text2mel(L, Y, monotonic_attention=True) Y = torch.cat((zeros, Y_t), -1) _, attention = torch.max(A[0, :, -1], 0) attention = attention.item() if L[0, attention] == vocab.index('E'): # EOS break _, Z = ssrn(Y) Y = Y.cpu().detach().numpy() A = A.cpu().detach().numpy() Z = Z.cpu().detach().numpy() save_to_png('samples/%d-att.png' % (i + 1), A[0, :, :]) save_to_png('samples/%d-mel.png' % (i + 1), Y[0, :, :]) save_to_png('samples/%d-mag.png' % (i + 1), Z[0, :, :]) save_to_wav(Z[0, :, :].T, 'samples/%d-wav.wav' % (i + 1))
max_N = len(SENTENCES[i]) L = torch.from_numpy(get_test_data(sentences, max_N)) zeros = torch.from_numpy(np.zeros((1, hp.n_mels, 1), np.float32)) Y = zeros A = None for t in tqdm(range(hp.max_T)): _, Y_t, A = text2mel(L, Y, monotonic_attention=True) Y = torch.cat((zeros, Y_t), -1) _, attention = torch.max(A[0, :, -1], 0) attention = attention.item() if L[0, attention] == vocab.index('E'): # EOS break _, Z = ssrn(Y) Y = Y.cpu().detach().numpy() A = A.cpu().detach().numpy() Z = Z.cpu().detach().numpy() #print("Z", Z[0, :, :]) save_dir = hp.synthesize_samples_dir file_name = save_dir + '/' + t2mel_step_str + "_" + ssrn_step_str os.makedirs(file_name, exist_ok=True) save_to_png(file_name + '/%d-att.png' % (i + 1), A[0, :, :]) save_to_png(file_name + '/%d-mel.png' % (i + 1), Y[0, :, :]) save_to_png(file_name + '/%d-mag.png' % (i + 1), Z[0, :, :]) save_to_wav(Z[0, :, :].T, file_name + '/%d-wav.wav' % (i + 1))