def _init_geometry(self, batch_win_size): """ Initializes: self.enc_in_len self.trim_ups_out self.trim_dec_out self.trim_dec_in """ # Calculate max length of mfcc encoder input and wav decoder input w = batch_win_size mfcc_vc = self.encoder.vc['beg'].parent beg_grcc_vc = self.decoder.vc['beg_grcc'] end_grcc_vc = self.decoder.vc['end_grcc'] end_ups_vc = self.decoder.vc['last_upsample'] end_enc_vc = self.encoder.vc['end'] do = vconv.GridRange((0, 100000), (0, w), 1) di = vconv.input_range(beg_grcc_vc, end_grcc_vc, do) ei = vconv.input_range(mfcc_vc, end_grcc_vc, do) mi = vconv.input_range(mfcc_vc.child, end_grcc_vc, do) eo = vconv.output_range(mfcc_vc, end_enc_vc, ei) uo = vconv.output_range(mfcc_vc, end_ups_vc, ei) # Needed for trimming various tensors self.enc_in_len = ei.sub_length() self.enc_in_mel_len = mi.sub_length() self.embed_len = eo.sub_length() self.dec_in_len = di.sub_length() self.trim_dec_in = torch.tensor([di.sub[0] - ei.sub[0], di.sub[1] - ei.sub[0]], dtype=torch.long) self.decoder.trim_ups_out = torch.tensor([di.sub[0] - uo.sub[0], di.sub[1] - uo.sub[0]], dtype=torch.long) self.trim_dec_out = torch.tensor([do.sub[0] - di.sub[0], do.sub[1] - di.sub[0]], dtype=torch.long)
def _init_geometry(self, batch_win_size): """ Initializes lengths and trimming needed to produce batch_win_size output self.enc_in_len - encoder input length (timesteps) self.dec_in_len - decoder input length (timesteps) self.trim_ups_out - trims decoder lc_dense before use self.trim_dec_out - trims wav_dec_input to wav_dec_output self.trim_dec_in - trims wav_enc_input to wav_dec_input The trimming vectors are needed because, due to striding geometry, output tensors cannot be produced in single-increment sizes, therefore must be over-produced in some cases. """ # Calculate max length of mfcc encoder input and wav decoder input w = batch_win_size mfcc_vc = self.encoder.vc['beg'].parent end_enc_vc = self.encoder.vc['end'] end_ups_vc = self.decoder.vc['last_upsample'] beg_grcc_vc = self.decoder.vc['beg_grcc'] end_grcc_vc = self.decoder.vc['end_grcc'] # naming: (d: decoder, e: encoder, u: upsample), (o: output, i:input) do = vconv.GridRange((0, 100000), (0, w), 1) di = vconv.input_range(beg_grcc_vc, end_grcc_vc, do) ei = vconv.input_range(mfcc_vc, end_grcc_vc, do) mi = vconv.input_range(mfcc_vc.child, end_grcc_vc, do) eo = vconv.output_range(mfcc_vc, end_enc_vc, ei) uo = vconv.output_range(mfcc_vc, end_ups_vc, ei) # Needed for trimming various tensors self.enc_in_len = ei.sub_length() self.enc_in_mel_len = mi.sub_length() # used by jitter_index self.embed_len = eo.sub_length() # sets size for wav_dec_in self.dec_in_len = di.sub_length() # trims wav_enc_input to wav_dec_input self.trim_dec_in = torch.tensor( [di.sub[0] - ei.sub[0], di.sub[1] - ei.sub[0]], dtype=torch.long) # needed by wavenet to trim upsampled local conditioning tensor self.decoder.trim_ups_out = torch.tensor( [di.sub[0] - uo.sub[0], di.sub[1] - uo.sub[0]], dtype=torch.long) # self.trim_dec_out = torch.tensor( [do.sub[0] - di.sub[0], do.sub[1] - di.sub[0]], dtype=torch.long)
def autoenc_test(vcs, in_len, slice_beg): enc = vcs['MFCC'], vcs['Upsampling_3'] dec = vcs['GRCC_0,0'], vcs['GRCC_1,9'] mfcc = vcs['MFCC'], vcs['MFCC'] autoenc = vcs['MFCC'], vcs['GRCC_1,9'] full_in = vconv.GridRange((0, in_len), (0, in_len), 1) full_mfcc = vconv.output_range(*mfcc, full_in) full_out = vconv.output_range(*autoenc, full_in) out_req = vconv.GridRange(full_out.full, (slice_beg, slice_beg + 100), 1) mid_req = vconv.input_range(*dec, out_req) in_req = vconv.input_range(*enc, mid_req) in_act = in_req mfcc_act = vconv.output_range(*mfcc, in_act) mid_act = vconv.output_range(*enc, in_act) # wav -> wav_mid wav_mid_sl = vconv.tensor_slice(in_act, mid_req.sub) # wav_mid_ten = wav_ten[wav_mid_sl] # lcond -> lcond_sl lcond_sl = vconv.tensor_slice(mid_act, mid_req.sub) # lcond_sl_ten = lcond_ten[lcond_sl] # wav -> wav_out # +1 since it is predicting the next step wav_out_sl = vconv.tensor_slice(in_act, out_req.sub) # wav_out_ten = wav_ten[sl_b+1:sl_e+1] mfcc_in_sl = vconv.tensor_slice(full_mfcc, mfcc_act.sub) print('{:10}: {}'.format('full_in', full_in)) print('{:10}: {}'.format('full_mfcc', full_mfcc)) print('{:10}: {}'.format('in_req', in_req)) print('{:10}: {}'.format('mfcc_req', mfcc_act)) print('{:10}: {}'.format('mid_req', mid_req)) print('{:10}: {}'.format('mid_act', mid_act)) print('{:10}: {}'.format('out_req', out_req)) print('{:10}: {}'.format('full_out', full_out)) print('wav_mid_sl: {} len: {}'.format(wav_mid_sl, wav_mid_sl[1] - wav_mid_sl[0])) print('mfcc_in_sl: {} len: {}'.format(mfcc_in_sl, mfcc_in_sl[1] - mfcc_in_sl[0])) print('lcond_sl: {} len: {}'.format(lcond_sl, lcond_sl[1] - lcond_sl[0])) print('wav_out_sl: {} len: {}'.format(wav_out_sl, wav_out_sl[1] - wav_out_sl[0]))
def usage_test(vc_range, winsize): c = Counter() for b in range(winsize): out = vconv.GridRange((0, 100000), (b, b + 1), 1) input = vconv.input_range(*vc_range, out) slice = vconv.tensor_slice(input, input.sub) c[slice] += 1 print(c)
def phase_test(vc_range, n_sub_win, winsize): c = Counter() for b in range(n_sub_win): out = vconv.GridRange((0, 90000), (b, b + winsize), 1) input = vconv.input_range(*vc_range, out) c[input.sub_length()] += 1 # print(mfcc.sub_length(), end=' ') print(c)
def _init_geometry(self, batch_win_size): w = batch_win_size beg_grcc_vc = self.wavenet.vc['beg_grcc'] end_grcc_vc = self.wavenet.vc['end_grcc'] do = vconv.GridRange((0, 100000), (0, w), 1) di = vconv.input_range(beg_grcc_vc, end_grcc_vc, do) self.trim_dec_out = torch.tensor( [do.sub[0] - di.sub[0], do.sub[1] - di.sub[0]], dtype=torch.long)
def _init_geometry(self, batch_win_size): """ Initializes: self.enc_in_len - timesteps of encoder input needed to produce batch_win_size decoder output timesteps self.trim_ups_out - offsets for trimming the upsampler output tensor self.trim_dec_out - offsets for trimming the decoder output self.trim_dec_in - offsets for trimming the decoder input The trimming vectors are needed because, due to striding geometry, output tensors cannot be produced in single-increment sizes, therefore must be over-produced in some cases. """ # Calculate max length of mfcc encoder input and wav decoder input w = batch_win_size mfcc_vc = self.encoder.vc['beg'].parent beg_grcc_vc = self.decoder.vc['beg_grcc'] end_grcc_vc = self.decoder.vc['end_grcc'] end_ups_vc = self.decoder.vc['last_upsample'] end_enc_vc = self.encoder.vc['end'] # naming: (d: decoder, e: encoder, u: upsample), (o: output, i:input) do = vconv.GridRange((0, 100000), (0, w), 1) di = vconv.input_range(beg_grcc_vc, end_grcc_vc, do) ei = vconv.input_range(mfcc_vc, end_grcc_vc, do) mi = vconv.input_range(mfcc_vc.child, end_grcc_vc, do) eo = vconv.output_range(mfcc_vc, end_enc_vc, ei) uo = vconv.output_range(mfcc_vc, end_ups_vc, ei) # Needed for trimming various tensors self.enc_in_len = ei.sub_length() self.enc_in_mel_len = mi.sub_length() self.embed_len = eo.sub_length() self.dec_in_len = di.sub_length() self.trim_dec_in = torch.tensor( [di.sub[0] - ei.sub[0], di.sub[1] - ei.sub[0]], dtype=torch.long) self.decoder.trim_ups_out = torch.tensor( [di.sub[0] - uo.sub[0], di.sub[1] - uo.sub[0]], dtype=torch.long) self.trim_dec_out = torch.tensor( [do.sub[0] - di.sub[0], do.sub[1] - di.sub[0]], dtype=torch.long)
def downsample_test(vc, x): try: y = vconv.output_range(vc, vc, x) except RuntimeError: return Result.NO_OUTPUT try: xn = vconv.input_range(vc, vc, y) except RuntimeError: return Result.NO_INPUT try: yt = vconv.output_range(vc, vc, xn) except RuntimeError: return Result.NO_OUTPUT try: xt = vconv.input_range(vc, vc, yt) except RuntimeError: return Result.NO_INPUT if xn != xt: return Result.UNEQUAL else: return Result.SUCCESS
def same_or_upsample_test(vc, x): try: y = vconv.output_range(vc, vc, x) except RuntimeError: return Result.NO_OUTPUT try: xn = vconv.input_range(vc, vc, y) except RuntimeError: return Result.NO_INPUT if xn != x: return Result.UNEQUAL else: return Result.SUCCESS
if vc.stride_ratio.numerator > 1: res = downsample_test(vc, x) else: res = same_or_upsample_test(vc, x) results[res] += 1 if c > 0 and c % t.report_freq == 0: print(results) c += 1 print('Finished') print('Results: {}'.format(results)) x = vconv.GridRange((0, 250000), (0, 250000), 1) y = vconv.output_range(vcs['MFCC'], vcs['GRCC_1,9'], x) xi = vconv.input_range(vcs['MFCC'], vcs['GRCC_1,9'], y) #print('x0: {}'.format(x)) #print('y0: {}'.format(y)) #print('xi: {}'.format(xi)) def autoenc_test(vcs, in_len, slice_beg): enc = vcs['MFCC'], vcs['Upsampling_3'] dec = vcs['GRCC_0,0'], vcs['GRCC_1,9'] mfcc = vcs['MFCC'], vcs['MFCC'] autoenc = vcs['MFCC'], vcs['GRCC_1,9'] full_in = vconv.GridRange((0, in_len), (0, in_len), 1) full_mfcc = vconv.output_range(*mfcc, full_in) full_out = vconv.output_range(*autoenc, full_in)