opt_kwargs = {'lr': args.lr, 'lr_decay': args.lr_decay, 'weight_decay': args.weight_decay, 'dampening': args.dampening, 'momentum': args.momentum} l2_dist = nn.CosineSimilarity(dim=1, eps=1e-12) if args.cos_sim else nn.PairwiseDistance(p=2) if args.acoustic_feature == 'fbank': transform = transforms.Compose([ totensor() ]) transform_T = transforms.Compose([ concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, input_per_file=args.test_input_per_file, remove_vad=args.remove_vad), ]) transform_V = transforms.Compose([ ConcateVarInput(remove_vad=args.remove_vad), # varLengthFeat(remove_vad=args.remove_vad), # concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, input_per_file=args.test_input_per_file, # remove_vad=args.remove_vad), ]) else: transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(), # tonormal() ]) file_loader = read_audio if args.log_scale:
ConcateOrgInput(remove_vad=args.remove_vad), ]) transform_T = transforms.Compose([ ConcateOrgInput(remove_vad=args.remove_vad), >>>>>>> Server/Server:TrainAndTest/test_egs.py ]) elif args.input_length == 'fix': transform = transforms.Compose([ <<<<<<< HEAD:TrainAndTest/test_vox1.py concateinputfromMFB(remove_vad=args.remove_vad), ]) transform_T = transforms.Compose([ concateinputfromMFB(input_per_file=args.test_input_per_file, remove_vad=args.remove_vad), ======= ConcateVarInput(frame_shift=args.frame_shift, remove_vad=args.remove_vad), ]) transform_T = transforms.Compose([ ConcateVarInput(frame_shift=args.frame_shift, remove_vad=args.remove_vad), >>>>>>> Server/Server:TrainAndTest/test_egs.py ]) else: raise ValueError('input length must be var or fix.') if args.mvnorm: transform.transforms.append(mvnormal()) transform_T.transforms.append(mvnormal()) # pdb.set_trace() if args.feat_format == 'kaldi': file_loader = read_mat
opt_kwargs = {'lr': args.lr, 'lr_decay': args.lr_decay, 'weight_decay': args.weight_decay, 'dampening': args.dampening, 'momentum': args.momentum} l2_dist = nn.CosineSimilarity(dim=1, eps=1e-6) if args.cos_sim else PairwiseDistance(2) if args.acoustic_feature == 'fbank': transform = transforms.Compose([ concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, remove_vad=args.remove_vad), # varLengthFeat(), to2tensor() ]) transform_T = transforms.Compose([ ConcateVarInput(num_frames=c.NUM_FRAMES_SPECT, remove_vad=args.remove_vad), # to2tensor() ]) transform_V = transforms.Compose([ varLengthFeat(remove_vad=args.remove_vad), to2tensor() ]) else: transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(), # tonormal() ]) file_loader = read_audio
np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: cudnn.benchmark = True # create logger # Define visulaize SummaryWriter instance kwargs = {'num_workers': 12, 'pin_memory': True} if args.cuda else {} l2_dist = nn.CosineSimilarity( dim=1, eps=1e-6) if args.cos_sim else PairwiseDistance(2) if args.acoustic_feature == 'fbank': transform = transforms.Compose([ ConcateVarInput(), ]) transform_T = transforms.Compose([ ConcateVarInput(), ]) file_loader = read_mat else: transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(), # tonormal() ]) file_loader = read_audio # pdb.set_trace()
transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(), # tonormal() ]) if args.test_input == 'var': transform_V = transforms.Compose([ ConcateOrgInput(remove_vad=args.remove_vad, feat_type=args.feat_format), ]) elif args.test_input == 'fix': transform_V = transforms.Compose([ ConcateVarInput(remove_vad=args.remove_vad, num_frames=args.chunk_size, frame_shift=args.chunk_size, feat_type=args.feat_format), ]) if args.log_scale: transform.transforms.append(tolog()) transform_V.transforms.append(tolog()) # pdb.set_trace() if args.feat_format == 'kaldi': file_loader = read_mat elif args.feat_format == 'npy': file_loader = np.load elif args.feat_format == 'wav': file_loader = load_mat