train_data = SpectrogramDataset( vocab, args, audio_conf, manifest_filepath_list=args.train_manifest_list, normalize=True, augment=args.augment, input_type=args.input_type, is_train=True, partitions=args.train_partition_list) elif args.feat == "logfbank": train_data = LogFBankDataset( vocab, args, audio_conf, manifest_filepath_list=args.train_manifest_list, normalize=True, augment=args.augment, input_type=args.input_type, is_train=True) train_data_list.append(train_data) valid_loader_list, test_loader_list = [], [] for i in range(len(args.valid_manifest_list)): if args.feat == "spectrogram": valid_data = SpectrogramDataset( vocab, args, audio_conf, manifest_filepath_list=[args.valid_manifest_list[i]], normalize=True,
audio_conf = dict(sample_rate=loaded_args.sample_rate, window_size=loaded_args.window_size, window_stride=loaded_args.window_stride, window=loaded_args.window, noise_dir=loaded_args.noise_dir, noise_prob=loaded_args.noise_prob, noise_levels=(loaded_args.noise_min, loaded_args.noise_max)) test_manifest_list = args.test_manifest_list print("INPUT TYPE: ", args.input_type) if loaded_args.feat == "spectrogram": test_data = SpectrogramDataset(vocab, args, audio_conf=audio_conf, manifest_filepath_list=[test_manifest_list[0]], normalize=True, augment=False, input_type=args.input_type) elif loaded_args.feat == "logfbank": test_data = LogFBankDataset(vocab, args, audio_conf=audio_conf, manifest_filepath_list=[test_manifest_list[0]], normalize=True, augment=False, input_type=args.input_type) test_sampler = BucketingSampler(test_data, batch_size=args.k_test) test_loader = AudioDataLoader(vocab.PAD_ID, dataset=test_data, num_workers=args.num_workers, batch_sampler=test_sampler) print("Parameters: {}(trainable), {}(non-trainable)".format(compute_num_params(model)[0], compute_num_params(model)[1])) if not args.cuda: model = model.cpu() lm = None if args.lm_rescoring: lm = LM(args.lm_path, args) print(">>>>>>>>>", args.tgt_max_len) evaluate(model, vocab, test_loader, args, lm=lm, start_token=vocab.SOS_ID)