Exemplo n.º 1
0
    if args.wav_scp is None and args.feat_scp is None:
        logger.info('fallback ... read feat_scp from data_cfg')
        args.feat_scp = data_cfg['feat']['all']

    if args.mode == 'tf':
        assert args.data_cfg is not None
        text_iterator = TextIterator(path=data_cfg['text']['all'],
                                     map_text2idx=map_text2idx)

    if args.wav_scp is not None:
        # list all wav files #
        list_key_wav = regex_key_val.findall(open(args.wav_scp).read())
        if args.set is not None:
            if os.path.exists(args.set):
                list_key_wav = DataLoader._subset_data(
                    list_key_wav, DataLoader._read_key(args.set))
            else:
                args.set = args.set.split(' ')
                list_key_wav = DataLoader._subset_data(list_key_wav, args.set)
        list_key_wav = sorted(list_key_wav, key=lambda x: x[0])

        # lazy load -- saving memory #
        def lazy_generate_feat(path, cfg):
            _feat = generate_feat_opts(path=path, cfg=cfg)
            if scaler is not None:
                _feat = scaler.transform(_feat)
            return _feat

        list_feat = []
        list_feat_len = []
        list_key = [x[0] for x in list_key_wav]
Exemplo n.º 2
0
import argparse

from utilbox.regex_util import regex_key_val

from euterpe.common.loader import DataLoader


def parse():
    parser = argparse.ArgumentParser()
    parser.add_argument('--kv', type=str)
    parser.add_argument('--set', type=str)
    return parser.parse_args()
    pass


if __name__ == '__main__':
    args = parse()
    list_kv = DataLoader._read_key_val(args.kv)
    list_set = DataLoader._read_key(args.set)
    list_subset = DataLoader._subset_data(list_kv, list_set)
    for k, v in list_subset:
        print('{} {}'.format(k, v))
    pass
Exemplo n.º 3
0
if __name__ == '__main__':
    args = parse()

    args.config = os.path.splitext(args.model)[0] + '.cfg'
    data_cfg = yaml.load(open(args.data_cfg))

    # construct all subset feat & feat_len
    if os.path.exists(args.key):
        _key = DataLoader._read_key(args.key)
    else:
        _key = args.key.split()
    _feat_all = DataLoader._read_key_val(data_cfg['feat']['all'])
    _feat_len_all_path = '{}_len{}'.format(
        *os.path.splitext(data_cfg['feat']['all']))
    _feat_len_all = DataLoader._read_key_val(_feat_len_all_path)
    _feat_kv = DataLoader._subset_data(_feat_all, _key)
    _feat_len_kv = DataLoader._subset_data(_feat_len_all, _key)

    model = ModelSerializer.load_config(args.config)
    model.load_state_dict(torch.load(args.model))
    model.eval()  # set as eval mode

    if args.gpu >= 0:
        torch.cuda.set_device(args.gpu)
        torch.cuda.manual_seed(123)
        model.cuda(args.gpu)

    # get feature and information
    data_info = pd.read_csv(data_cfg['misc']['info'], sep=',')
    map_key2spk = dict(
        (str(k), str(v))