예제 #1
0
            train_data = SpectrogramDataset(
                vocab,
                args,
                audio_conf,
                manifest_filepath_list=args.train_manifest_list,
                normalize=True,
                augment=args.augment,
                input_type=args.input_type,
                is_train=True,
                partitions=args.train_partition_list)
        elif args.feat == "logfbank":
            train_data = LogFBankDataset(
                vocab,
                args,
                audio_conf,
                manifest_filepath_list=args.train_manifest_list,
                normalize=True,
                augment=args.augment,
                input_type=args.input_type,
                is_train=True)
        train_data_list.append(train_data)

    valid_loader_list, test_loader_list = [], []
    for i in range(len(args.valid_manifest_list)):
        if args.feat == "spectrogram":
            valid_data = SpectrogramDataset(
                vocab,
                args,
                audio_conf,
                manifest_filepath_list=[args.valid_manifest_list[i]],
                normalize=True,
예제 #2
0
    audio_conf = dict(sample_rate=loaded_args.sample_rate,
                      window_size=loaded_args.window_size,
                      window_stride=loaded_args.window_stride,
                      window=loaded_args.window,
                      noise_dir=loaded_args.noise_dir,
                      noise_prob=loaded_args.noise_prob,
                      noise_levels=(loaded_args.noise_min, loaded_args.noise_max))

    test_manifest_list = args.test_manifest_list

    print("INPUT TYPE: ", args.input_type)
    if loaded_args.feat == "spectrogram":
        test_data = SpectrogramDataset(vocab, args, audio_conf=audio_conf, manifest_filepath_list=[test_manifest_list[0]], normalize=True, augment=False, input_type=args.input_type)
    elif loaded_args.feat == "logfbank":
        test_data = LogFBankDataset(vocab, args, audio_conf=audio_conf, manifest_filepath_list=[test_manifest_list[0]], normalize=True, augment=False, input_type=args.input_type)
    test_sampler = BucketingSampler(test_data, batch_size=args.k_test)
    test_loader = AudioDataLoader(vocab.PAD_ID, dataset=test_data, num_workers=args.num_workers, batch_sampler=test_sampler)

    print("Parameters: {}(trainable), {}(non-trainable)".format(compute_num_params(model)[0], compute_num_params(model)[1]))

    if not args.cuda:
        model = model.cpu()

    lm = None
    if args.lm_rescoring:
        lm = LM(args.lm_path, args)

    print(">>>>>>>>>", args.tgt_max_len)
    evaluate(model, vocab, test_loader, args, lm=lm, start_token=vocab.SOS_ID)