# create logger
logger = Logger(LOG_DIR)

kwargs = {'num_workers': 0, 'pin_memory': True} if args.cuda else {}
l2_dist = PairwiseDistance(2)

# voxceleb = read_my_voxceleb_structure(args.dataroot)
# if args.makemfb:
#pbar = tqdm(voxceleb)
# for datum in voxceleb:
#     # print(datum['filename'])
#     mk_MFB((args.dataroot +'/voxceleb1_wav/' + datum['filename']+'.wav'))
# print("Complete convert")

if args.mfb:
    transform = transforms.Compose([truncatedinputfromMFB(), totensor()])
    transform_T = transforms.Compose([
        truncatedinputfromMFB(input_per_file=args.test_input_per_file),
        totensor()
    ])
    file_loader = read_MFB
else:
    transform = transforms.Compose([
        truncatedinput(),
        toMFB(),
        totensor(),
        #tonormal()
    ])
    file_loader = read_audio

# voxceleb_dev = [datum for datum in voxceleb if datum['subset']=='dev']
Exemplo n.º 2
0
if args.makemfb:
    #pbar = tqdm(voxceleb)
    for datum in audio_set:
        # print(datum['filename'])
        mk_MFB((datum['filename']+'.wav'))
    print("Complete convert")

if args.mfb:
    transform = transforms.Compose([
        concateinputfromMFB(),
        to4tensor()
        # truncatedinputfromMFB(),
        # totensor()
    ])
    transform_T = transforms.Compose([
        truncatedinputfromMFB(input_per_file=args.test_input_per_file),
        totensor()
    ])
    file_loader = read_MFB
else:
    transform = transforms.Compose([
                        truncatedinput(),
                        toMFB(),
                        totensor(),
                        #tonormal()
                    ])
    file_loader = read_audio

enroll_dir = DeepSpeakerEnrollDataset(audio_set=audio_set, dir=args.dataroot, loader=file_loader, transform=transform, enroll=args.enroll)

classes_to_label = enroll_dir.class_to_idx