if not os.path.exists(args.check_path): os.makedirs(args.check_path) opt_kwargs = { 'lr': args.lr, 'lr_decay': args.lr_decay, 'weight_decay': args.weight_decay, 'dampening': args.dampening, 'momentum': args.momentum } l2_dist = nn.CosineSimilarity( dim=1, eps=1e-12) if args.cos_sim else nn.PairwiseDistance(p=2) if args.acoustic_feature == 'fbank': transform = transforms.Compose([totensor()]) transform_T = transforms.Compose([ concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, input_per_file=args.test_input_per_file, remove_vad=args.remove_vad), ]) transform_V = transforms.Compose([ varLengthFeat(remove_vad=args.remove_vad), ]) else: transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(), # tonormal()
else: l2_dist = PairwiseDistance(2) voxceleb, voxceleb_dev = wav_list_reader(args.dataroot) if args.makemfb: # pbar = tqdm(voxceleb) for datum in voxceleb: mk_MFB( (args.dataroot + '/voxceleb1_wav/' + datum['filename'] + '.wav')) print("Complete convert") if args.mfb: transform = transforms.Compose([ concateinputfromMFB(), # truncatedinputfromMFB(), totensor() ]) transform_T = transforms.Compose([ concateinputfromMFB(input_per_file=args.test_input_per_file), # truncatedinputfromMFB(input_per_file=args.test_input_per_file), totensor() ]) file_loader = read_MFB else: transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(), # tonormal() ]) file_loader = read_audio
# create logger logger = Logger(LOG_DIR) kwargs = {'num_workers': 0, 'pin_memory': True} if args.cuda else {} l2_dist = PairwiseDistance(2) # voxceleb = read_my_voxceleb_structure(args.dataroot) # if args.makemfb: #pbar = tqdm(voxceleb) # for datum in voxceleb: # # print(datum['filename']) # mk_MFB((args.dataroot +'/voxceleb1_wav/' + datum['filename']+'.wav')) # print("Complete convert") if args.mfb: transform = transforms.Compose([truncatedinputfromMFB(), totensor()]) transform_T = transforms.Compose([ truncatedinputfromMFB(input_per_file=args.test_input_per_file), totensor() ]) file_loader = read_MFB else: transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(), #tonormal() ]) file_loader = read_audio # voxceleb_dev = [datum for datum in voxceleb if datum['subset']=='dev']
# print("Complete convert") # # if args.makespec: # num_pro = 1. # for datum in voxceleb: # # Data/voxceleb1/ # # /data/voxceleb/voxceleb1_wav/ # GenerateSpect(wav_path='/data/voxceleb/voxceleb1_wav/' + datum['filename']+'.wav', # write_path=args.dataroot +'/spectrogram/voxceleb1_wav/' + datum['filename']+'.npy') # print('\rprocessed {:2f}% {}/{}.'.format(num_pro/len(voxceleb), num_pro, len(voxceleb)), end='\r') # num_pro += 1 # print('\nComputing Spectrograms success!') # exit(1) if args.acoustic_feature == 'fbank': transform = transforms.Compose([concateinputfromMFB(), totensor()]) transform_T = transforms.Compose([ concateinputfromMFB(input_per_file=args.test_input_per_file), totensor() ]) file_loader = read_MFB else: transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(), # tonormal() ]) file_loader = read_audio
# concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, remove_vad=True), varLengthFeat(remove_vad=args.remove_vad), to2tensor(), tonormal() ]) transform_T = transforms.Compose([ # concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, input_per_file=args.test_input_per_file, remove_vad=True), varLengthFeat(remove_vad=args.remove_vad), to2tensor(), tonormal() ]) else: transform = transforms.Compose( [truncatedinput(), toMFB(), totensor(), tonormal()]) file_loader = read_audio # pdb.set_trace() file_loader = read_mat train_dir = ScriptTrainDataset(dir=args.train_dir, samples_per_speaker=args.input_per_spks, loader=file_loader, transform=transform, num_valid=args.num_valid) test_dir = ScriptTestDataset(dir=args.test_dir, loader=file_loader, transform=transform_T) if len(test_dir) < args.veri_pairs: args.veri_pairs = len(test_dir)
np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: cudnn.benchmark = True # create logger # Define visulaize SummaryWriter instance kwargs = {'num_workers': 12, 'pin_memory': True} if args.cuda else {} l2_dist = nn.CosineSimilarity( dim=1, eps=1e-6) if args.cos_sim else PairwiseDistance(2) if args.acoustic_feature == 'fbank': transform = transforms.Compose([varLengthFeat(), totensor()]) transform_T = transforms.Compose([varLengthFeat(), totensor()]) file_loader = read_mat else: transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(), # tonormal() ]) file_loader = read_audio # pdb.set_trace() train_dir = KaldiExtractDataset(dir=args.train_dir, loader=file_loader, transform=transform)
'lr_decay': args.lr_decay, 'weight_decay': args.weight_decay, 'dampening': args.dampening, 'momentum': args.momentum } l2_dist = nn.CosineSimilarity( dim=1, eps=1e-12) if args.cos_sim else nn.PairwiseDistance(p=2) if args.acoustic_feature == 'fbank': transform = transforms.Compose([ ConcateNumInput(input_per_file=args.input_per_spks, num_frames=args.chunk_size, feat_type=args.feat_format, remove_vad=args.remove_vad), totensor() ]) if args.test_input == 'var': transform_V = transforms.Compose([ ConcateOrgInput(remove_vad=args.remove_vad, feat_type=args.feat_format), ]) elif args.test_input == 'fix': transform_V = transforms.Compose([ ConcateVarInput(remove_vad=args.remove_vad, num_frames=args.chunk_size, frame_shift=args.chunk_size, feat_type=args.feat_format), ])