dim=1, eps=1e-6) if args.cos_sim else PairwiseDistance(2) if args.input_length == 'var': transform = transforms.Compose([ # concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, remove_vad=False), varLengthFeat(remove_vad=args.remove_vad), to2tensor() ]) transform_T = transforms.Compose([ # concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, input_per_file=args.test_input_per_file, remove_vad=False), varLengthFeat(remove_vad=args.remove_vad), to2tensor() ]) elif args.input_length == 'fix': transform = transforms.Compose( [concateinputfromMFB(remove_vad=args.remove_vad), to2tensor()]) transform_T = transforms.Compose([ concateinputfromMFB(input_per_file=args.test_input_per_file, remove_vad=args.remove_vad), to2tensor() ]) if args.mvnorm: transform.transforms.append(mvnormal()) transform_T.transforms.append(mvnormal()) file_loader = read_mat train_dir = ScriptTrainDataset(dir=args.train_dir, samples_per_speaker=args.input_per_spks,
kwargs = {'num_workers': args.nj, 'pin_memory': False} if args.cuda else {} if not os.path.exists(args.check_path): os.makedirs(args.check_path) opt_kwargs = {'lr': args.lr, 'lr_decay': args.lr_decay, 'weight_decay': args.weight_decay, 'dampening': args.dampening, 'momentum': args.momentum} l2_dist = nn.CosineSimilarity(dim=1, eps=1e-12) if args.cos_sim else nn.PairwiseDistance(p=2) if args.acoustic_feature == 'fbank': transform = transforms.Compose([ totensor() ]) transform_T = transforms.Compose([ concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, input_per_file=args.test_input_per_file, remove_vad=args.remove_vad), ]) transform_V = transforms.Compose([ ConcateVarInput(remove_vad=args.remove_vad), # varLengthFeat(remove_vad=args.remove_vad), # concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, input_per_file=args.test_input_per_file, # remove_vad=args.remove_vad), ]) else: transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(), # tonormal() ])
if args.cos_sim: l2_dist = nn.CosineSimilarity(dim=1, eps=1e-6) else: l2_dist = PairwiseDistance(2) voxceleb, voxceleb_dev = wav_list_reader(args.dataroot) if args.makemfb: # pbar = tqdm(voxceleb) for datum in voxceleb: mk_MFB( (args.dataroot + '/voxceleb1_wav/' + datum['filename'] + '.wav')) print("Complete convert") if args.mfb: transform = transforms.Compose([ concateinputfromMFB(), # truncatedinputfromMFB(), totensor() ]) transform_T = transforms.Compose([ concateinputfromMFB(input_per_file=args.test_input_per_file), # truncatedinputfromMFB(input_per_file=args.test_input_per_file), totensor() ]) file_loader = read_MFB else: transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(), # tonormal()
# print("Complete convert") # # if args.makespec: # num_pro = 1. # for datum in voxceleb: # # Data/voxceleb1/ # # /data/voxceleb/voxceleb1_wav/ # GenerateSpect(wav_path='/data/voxceleb/voxceleb1_wav/' + datum['filename']+'.wav', # write_path=args.dataroot +'/spectrogram/voxceleb1_wav/' + datum['filename']+'.npy') # print('\rprocessed {:2f}% {}/{}.'.format(num_pro/len(voxceleb), num_pro, len(voxceleb)), end='\r') # num_pro += 1 # print('\nComputing Spectrograms success!') # exit(1) if args.acoustic_feature == 'fbank': transform = transforms.Compose([concateinputfromMFB(), totensor()]) transform_T = transforms.Compose([ concateinputfromMFB(input_per_file=args.test_input_per_file), totensor() ]) file_loader = read_MFB else: transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(), # tonormal() ]) file_loader = read_audio
if not os.path.exists(args.check_path): os.makedirs(args.check_path) opt_kwargs = { 'lr': args.lr, 'lr_decay': args.lr_decay, 'weight_decay': args.weight_decay, 'dampening': args.dampening, 'momentum': args.momentum } l2_dist = nn.CosineSimilarity( dim=1, eps=1e-6) if args.cos_sim else PairwiseDistance(2) if args.mfb: transform = transforms.Compose([ concateinputfromMFB(num_frames=300, remove_vad=True), # varLengthFeat(max_chunk_size=300), to2tensor() ]) transform_T = transforms.Compose([ concateinputfromMFB(num_frames=300, input_per_file=args.test_input_per_file, remove_vad=True), # varLengthFeat(), to2tensor() ]) else: transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(),
opt_kwargs = { 'lr': args.lr, 'lr_decay': args.lr_decay, 'weight_decay': args.weight_decay, 'dampening': args.dampening, 'momentum': args.momentum } l2_dist = nn.CosineSimilarity( dim=1, eps=1e-6) if args.cos_sim else PairwiseDistance(2) if args.mfb: transform = transforms.Compose([ concateinputfromMFB( remove_vad=args.remove_vad ), # num_frames=np.random.randint(low=300, high=500)), to2tensor() ]) transform_T = transforms.Compose([ concateinputfromMFB(input_per_file=args.test_input_per_file, remove_vad=args.remove_vad), to2tensor() ]) file_loader = read_mat else: transform = transforms.Compose([ concateinputfromMFB( remove_vad=True ), # num_frames=np.random.randint(low=300, high=500)),
args.cuda = not args.no_cuda and torch.cuda.is_available() random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: cudnn.benchmark = True # Define visulaize SummaryWriter instance kwargs = {'num_workers': 12, 'pin_memory': True} if args.cuda else {} l2_dist = nn.CosineSimilarity( dim=1, eps=1e-6) if args.cos_sim else PairwiseDistance(2) transform = transforms.Compose([ concateinputfromMFB(num_frames=c.MINIMUIN_LENGTH, remove_vad=True), # varLengthFeat(), to2tensor() ]) transform_T = transforms.Compose([ concateinputfromMFB(num_frames=c.MINIMUIN_LENGTH, input_per_file=args.test_input_per_file, remove_vad=True), # varLengthFeat(), to2tensor() ]) train_dir = ScriptTrainDataset(dir=args.train_dir, samples_per_speaker=args.input_per_spks, transform=transform, return_uid=True,
kwargs = {'num_workers': 12, 'pin_memory': True} if args.cuda else {} if not os.path.exists(args.check_path): os.makedirs(args.check_path) opt_kwargs = {'lr': args.lr, 'lr_decay': args.lr_decay, 'weight_decay': args.weight_decay, 'dampening': args.dampening, 'momentum': args.momentum} l2_dist = nn.CosineSimilarity(dim=1, eps=1e-6) if args.cos_sim else PairwiseDistance(2) if args.mfb: transform = transforms.Compose([ concateinputfromMFB(remove_vad=True), # num_frames=np.random.randint(low=300, high=500)), # varLengthFeat(), to2tensor() ]) else: transform = transforms.Compose([ truncatedinput(), toMFB(), to2tensor(), # tonormal() ]) file_loader = read_mat train_dir = ScriptTrainDataset(dir=args.train_dir, samples_per_speaker=args.input_per_spks, transform=transform, loader=file_loader, num_valid=args.num_valid) test_dir = ScriptTestDataset(dir=args.test_dir, transform=transform, loader=file_loader)
l2_dist = PairwiseDistance(2) audio_set = [] audio_set = if_load_npy(dataroot, data_set_list) if args.makemfb: #pbar = tqdm(voxceleb) for datum in audio_set: # print(datum['filename']) mk_MFB((datum['filename']+'.wav')) print("Complete convert") if args.mfb: transform = transforms.Compose([ concateinputfromMFB(), to4tensor() # truncatedinputfromMFB(), # totensor() ]) transform_T = transforms.Compose([ truncatedinputfromMFB(input_per_file=args.test_input_per_file), totensor() ]) file_loader = read_MFB else: transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(), #tonormal()
kwargs = {'num_workers': args.nj, 'pin_memory': False} if args.cuda else {} if not os.path.exists(args.check_path): os.makedirs(args.check_path) opt_kwargs = {'lr': args.lr, 'lr_decay': args.lr_decay, 'weight_decay': args.weight_decay, 'dampening': args.dampening, 'momentum': args.momentum} l2_dist = nn.CosineSimilarity(dim=1, eps=1e-6) if args.cos_sim else PairwiseDistance(2) if args.acoustic_feature == 'fbank': transform = transforms.Compose([ concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, remove_vad=args.remove_vad), # varLengthFeat(), to2tensor() ]) transform_T = transforms.Compose([ ConcateVarInput(num_frames=c.NUM_FRAMES_SPECT, remove_vad=args.remove_vad), # to2tensor() ]) transform_V = transforms.Compose([ varLengthFeat(remove_vad=args.remove_vad), to2tensor() ]) else: transform = transforms.Compose([ truncatedinput(),