def decode_generator(feat_list,
                     upsampling_factor=120,
                     string_path='/feat_mceplf0cap',
                     batch_size=1,
                     excit_dim=0):
    """DECODE BATCH GENERATOR

    Args:
        wav_list (str): list including wav files
        batch_size (int): batch size in decoding
        upsampling_factor (int): upsampling factor

    Return:
        (object): generator instance
    """
    with torch.no_grad():
        shape_list = [shape_hdf5(f, string_path)[0] for f in feat_list]
        idx = np.argsort(shape_list)
        feat_list = [feat_list[i] for i in idx]

        # divide into batch list
        n_batch = math.ceil(len(feat_list) / batch_size)
        batch_feat_lists = np.array_split(feat_list, n_batch)
        batch_feat_lists = [f.tolist() for f in batch_feat_lists]

        for batch_feat_list in batch_feat_lists:
            batch_feat = []
            n_samples_list = []
            feat_ids = []
            for featfile in batch_feat_list:
                ## load waveform
                if 'mel' in string_path:
                    if excit_dim > 0:
                        feat = np.c_[read_hdf5(featfile, '/feat_mceplf0cap'
                                               )[:, :excit_dim],
                                     read_hdf5(featfile, string_path)]
                    else:
                        feat = read_hdf5(featfile, string_path)
                else:
                    feat = read_hdf5(featfile, string_path)

                # append to list
                batch_feat += [feat]
                n_samples_list += [feat.shape[0] * upsampling_factor]
                feat_ids += [os.path.basename(featfile).replace(".h5", "")]

            # convert list to ndarray
            batch_feat = pad_list(batch_feat)

            # convert to torch variable
            batch_feat = torch.FloatTensor(batch_feat)
            if torch.cuda.is_available():
                batch_feat = batch_feat.cuda()

            yield feat_ids, (batch_feat, n_samples_list)
예제 #2
0
 def get_max_frame(l, feat_list, wav_list, cpu, spk_list, spk_dict):
     tmp_spk_dict = {}
     for spk in spk_list:
         tmp_spk_dict[spk] = {}
     #count = 0
     for feat, wav in zip(feat_list, wav_list):
         n_frame = shape_hdf5(feat, '/f0_range')[0]
         spk = os.path.basename(os.path.dirname(feat))
         tmp_spk_dict[spk][feat + "@" + wav] = n_frame
         logging.info(f'{cpu} {spk} {feat} {wav} {n_frame}')
     #    logging.info(tmp_spk_dict)
     #break
     #    count += 1
     #    if count > 2:
     #    if count > 5:
     #        break
     #time.sleep(10)
     l.acquire()
     try:
         for spk in spk_list:
             if bool(tmp_spk_dict[spk]):
                 spk_dict[spk].append(tmp_spk_dict[spk])
     finally:
         l.release()
예제 #3
0
def decode_generator(feat_list,
                     batch_size=32,
                     wav_transform=None,
                     feat_transform=None,
                     use_speaker_code=False,
                     upsampling_factor=0):
    """DECODE BATCH GENERATOR

    Args:
        featdir (str): directory including feat files
        batch_size (int): batch size in decoding
        wav_transform (func): preprocessing function for waveform
        feat_transform (func): preprocessing function for aux feats
        use_speaker_code (bool): whether to use speaker code
        upsampling_factor (int): upsampling factor

    Return:
        (object): generator instance
    """
    # for sample-by-sample generation
    if batch_size == 1:
        for featfile in feat_list:
            x = np.zeros((1))
            if upsampling_factor == 0:
                h = read_hdf5(featfile, "/feat")
            else:
                h = read_hdf5(featfile, "/feat_org")
            if use_speaker_code:
                sc = read_hdf5(featfile, "/speaker_code")
                sc = np.tile(sc, [h.shape[0], 1])
                h = np.concatenate([h, sc], axis=1)

            # perform pre-processing
            if wav_transform is not None:
                x = wav_transform(x)
            if feat_transform is not None:
                h = feat_transform(h)

            # convert to torch variable
            x = Variable(torch.from_numpy(x).long(), volatile=True)
            h = Variable(torch.from_numpy(h).float(), volatile=True)
            if torch.cuda.is_available():
                x = x.cuda()
                h = h.cuda()
            x = x.unsqueeze(0)  # 1 => 1 x 1
            h = h.transpose(0, 1).unsqueeze(0)  # T x C => 1 x C x T

            # get target length and file id
            if upsampling_factor == 0:
                n_samples = h.size(2) - 1
            else:
                n_samples = h.size(2) * upsampling_factor - 1
            feat_id = os.path.basename(featfile).replace(".h5", "")

            yield feat_id, (x, h, n_samples)

    # for batch generation
    else:
        # sort with the feature length
        if upsampling_factor == 0:
            shape_list = [shape_hdf5(f, "/feat")[0] for f in feat_list]
        else:
            shape_list = [shape_hdf5(f, "/feat_org")[0] for f in feat_list]
        idx = np.argsort(shape_list)
        feat_list = [feat_list[i] for i in idx]

        # divide into batch list
        n_batch = math.ceil(len(feat_list) / batch_size)
        batch_lists = np.array_split(feat_list, n_batch)
        batch_lists = [f.tolist() for f in batch_lists]

        for batch_list in batch_lists:
            batch_x = []
            batch_h = []
            n_samples_list = []
            feat_ids = []
            for featfile in batch_list:
                # make seed waveform and load aux feature
                x = np.zeros((1))
                if upsampling_factor == 0:
                    h = read_hdf5(featfile, "/feat")
                else:
                    h = read_hdf5(featfile, "/feat_org")
                if use_speaker_code:
                    sc = read_hdf5(featfile, "/speaker_code")
                    sc = np.tile(sc, [h.shape[0], 1])
                    h = np.concatenate([h, sc], axis=1)

                # perform pre-processing
                if wav_transform is not None:
                    x = wav_transform(x)
                if feat_transform is not None:
                    h = feat_transform(h)

                # append to list
                batch_x += [x]
                batch_h += [h]
                if upsampling_factor == 0:
                    n_samples_list += [h.shape[0] - 1]
                else:
                    n_samples_list += [h.shape[0] * upsampling_factor - 1]
                feat_ids += [os.path.basename(featfile).replace(".h5", "")]

            # convert list to ndarray
            batch_x = np.stack(batch_x, axis=0)
            batch_h = pad_list(batch_h)

            # convert to torch variable
            batch_x = Variable(torch.from_numpy(batch_x).long(), volatile=True)
            batch_h = Variable(torch.from_numpy(batch_h).float(),
                               volatile=True).transpose(1, 2)
            if torch.cuda.is_available():
                batch_x = batch_x.cuda()
                batch_h = batch_h.cuda()

            yield feat_ids, (batch_x, batch_h, n_samples_list)
예제 #4
0
def decode_generator(feat_list,
                     fs,
                     wav_transform=None,
                     feat_transform=None,
                     feature_type="world",
                     feat_ext=".h5",
                     dense_factor=8,
                     batch_size=32,
                     upsampling_factor=80,
                     f0_factor=1.0,
                     f0_dim_index=1,
                     extra_memory=False):
    """DECODE BATCH GENERATOR
    Args:
        feat_list (str): list of feat files
        fs (int): sampling rate
        wav_transform (func): preprocessing function for waveform
        feat_transform (func): preprocessing function for aux feats
        feature_type (str): feature type
        feat_ext (str): feature filename extension
        dense_factor (int): the number of taps in one cycle
        batch_size (int): batch size in decoding
        upsampling_factor (int): upsampling factor
        f0_factor (float): the ratio of scaled f0
        f0_dim_index (int): the dimension index of the f0 feature
        extra_memory(bool): processing dilated factor in tensor format or not
                * tensor mode will accelerate the decoding but consume more memory
    Return:
        (object): generator instance
    """
    # sort with the feature length
    shape_list = [shape_hdf5(f, "/" + feature_type)[0] for f in feat_list]
    idx = np.argsort(shape_list)
    feat_list = [feat_list[i] for i in idx]
    
    # divide into batch list
    n_batch = math.ceil(len(feat_list) / batch_size)
    batch_lists = np.array_split(feat_list, n_batch)
    batch_lists = [f.tolist() for f in batch_lists]
    
    for batch_list in batch_lists:
        batch_x = []
        batch_h = []
        batch_d = []
        feat_ids = []
        n_samples_list = []
        for featfile in batch_list:
            # make seed waveform and load aux feature
            x = np.zeros((1))
            h = read_hdf5(featfile, "/" + feature_type)
            if f0_factor is not 1.0:
                h[:, f0_dim_index] = h[:, f0_dim_index] * f0_factor
                d = _dilated_factor(_batch_f0(h), fs, dense_factor)
                d = extend_time(np.expand_dims(d, -1), upsampling_factor) # T x 1
    
            # perform pre-processing
            if wav_transform is not None:
                x = wav_transform(x)
            if feat_transform is not None:
                h = feat_transform(h)
    
            # append to list
            batch_x += [x]
            batch_h += [h]
            batch_d += [d]
            feat_ids += [os.path.basename(featfile).replace(feat_ext, "")]
            n_samples_list += [h.shape[0] * upsampling_factor - 1]
    
        # convert list to ndarray
        batch_x = np.stack(batch_x, axis=0)
        batch_h = pad_list(batch_h)
        batch_d = pad_list(batch_d)
        # convert to torch variable
        batch_x = torch.from_numpy(batch_x).long()
        batch_h = torch.from_numpy(batch_h).float().transpose(1, 2)
        if extra_memory:
            batch_d = torch.from_numpy(batch_d).float().squeeze(-1)
        else:
            batch_d = batch_d.squeeze(-1)
        
        # send to cuda
        if torch.cuda.is_available():
            batch_x = batch_x.cuda()
            batch_h = batch_h.cuda()
            if extra_memory:
                batch_d = batch_d.cuda()
        
        yield feat_ids, batch_x, batch_h, n_samples_list, batch_d
def decode_generator(feat_list,
                     upsampling_factor=120,
                     string_path='/feat_mceplf0cap',
                     batch_size=1):
    """DECODE BATCH GENERATOR

    Args:
        wav_list (str): list including wav files
        batch_size (int): batch size in decoding
        upsampling_factor (int): upsampling factor

    Return:
        (object): generator instance
    """
    with torch.no_grad():
        # sort with the wav length
        #shape_list = [length_wav(f) for f in wav_list]
        shape_list = [shape_hdf5(f, string_path)[0] for f in feat_list]
        idx = np.argsort(shape_list)
        #wav_list = [wav_list[i] for i in idx]
        feat_list = [feat_list[i] for i in idx]

        # divide into batch list
        #n_batch = math.ceil(len(wav_list) / batch_size)
        n_batch = math.ceil(len(feat_list) / batch_size)
        #batch_wav_lists = np.array_split(wav_list, n_batch)
        #batch_wav_lists = [f.tolist() for f in batch_wav_lists]
        batch_feat_lists = np.array_split(feat_list, n_batch)
        batch_feat_lists = [f.tolist() for f in batch_feat_lists]

        #for batch_wav_list, batch_feat_list in zip(batch_wav_lists, batch_feat_lists):
        for batch_feat_list in batch_feat_lists:
            #batch_x = []
            batch_feat = []
            n_samples_list = []
            feat_ids = []
            #for wav_file, featfile in zip(batch_wav_list, batch_feat_list):
            for featfile in batch_feat_list:
                # load waveform
                #    x, _ = sf.read(wav_file, dtype=np.float32)
                #_, x = wavfile.read(wav_file)
                #x = np.array(x, dtype=np.float64)
                #    x = x[:x.shape[0]-(x.shape[0]%upsampling_factor)]
                feat = read_hdf5(featfile, string_path)
                logging.info(feat[100])
                logging.info(featfile)
                #feat = np.c_[feat[:,0:2], feat[:,2:3]*(-np.exp(feat[:,3:6])), feat[:,6:]]
                #logging.info(feat[100])

                # append to list
                #    batch_x += [x]
                batch_feat += [feat]
                n_samples_list += [feat.shape[0] * upsampling_factor]
                feat_ids += [os.path.basename(featfile).replace(".h5", "")]

            # convert list to ndarray
            #batch_x = pad_list(batch_x)
            batch_feat = pad_list(batch_feat)

            # convert to torch variable
            #batch_x = torch.FloatTensor(batch_x)
            batch_feat = torch.FloatTensor(batch_feat)
            if torch.cuda.is_available():
                batch_feat = batch_feat.cuda()

            #yield feat_ids, (batch_x, batch_feat, n_samples_list)
            yield feat_ids, (batch_feat, n_samples_list)
예제 #6
0
def decode_generator(feat_list, batch_size=7, wav_transform=None, string_path='/feat_org_lf0', spk_trg=None, \
                    min_idx=None, upsampling_factor=0):
    """DECODE BATCH GENERATOR

    Args:
        featdir (str): directory including feat files
        batch_size (int): batch size in decoding
        wav_transform (func): preprocessing function for waveform
        upsampling_factor (int): upsampling factor

    Return:
        (object): generator instance
    """
    with torch.no_grad():
        #string_path = "/feat_org_lf0"
        #string_path = "/feat_lat_lf0_cyclevae-mult-jnt-scpost_gauss-2_32_97_VCC2TF1-300"
        #string_path = "/feat_lat_lf0_cyclevqvae-mult-jnt-scpost-1_50-50_101_VCC2TF1"
        # sort with the feature length
        #logging.info(string_path)
        #logging.info(feat_list)
        if spk_trg is not None:
            if min_idx is not None:
                string_path = '/feat_cvmcep_cycvae-' + str(
                    min_idx) + '-' + spk_trg
            else:
                string_path = '/feat_cvmcep_cycvae-' + spk_trg
        shape_list = [shape_hdf5(f, string_path)[0] for f in feat_list]
        idx = np.argsort(shape_list)
        feat_list = [feat_list[i] for i in idx]

        # divide into batch list
        n_batch = math.ceil(len(feat_list) / batch_size)
        batch_lists = np.array_split(feat_list, n_batch)
        batch_lists = [f.tolist() for f in batch_lists]

        for batch_list in batch_lists:
            batch_x = []
            batch_h = []
            n_samples_list = []
            feat_ids = []
            for featfile in batch_list:
                # make seed waveform and load aux feature
                #logging.info(featfile)
                x = np.zeros((1))
                h = read_hdf5(featfile, string_path)

                # perform pre-processing
                if wav_transform is not None:
                    x = wav_transform(x)

                # append to list
                batch_x += [x]
                batch_h += [h]
                n_samples_list += [h.shape[0] * upsampling_factor]
                feat_ids += [os.path.basename(featfile).replace(".h5", "")]

            # convert list to ndarray
            batch_x = np.stack(batch_x, axis=0)
            batch_h = pad_list(batch_h)

            # convert to torch variable
            batch_x = torch.LongTensor(batch_x)
            batch_h = torch.FloatTensor(batch_h).transpose(1, 2)
            if torch.cuda.is_available():
                batch_x = batch_x.cuda()
                batch_h = batch_h.cuda()

            yield feat_ids, (batch_x, batch_h, n_samples_list)