def main(): # Settings use_cuda = True log_dir = 'identification_model_ckpt' cp_num = 50 # Which checkpoint to use? test_frames = 200 # Model params resnet_version = 'resnet18' embedding_size = 128 n_classes = 200 # Load model from checkpoint model = load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes, resnet_version) # Get the dataframe for enroll DB featrues_path = '/cas/DeepLearn/elperu/tmp/speech_datasets/LibriSpeech/train_test_split/test' DB_all = read_feats_structure(featrues_path) # Where to save embeddings embedding_dir = '/cas/DeepLearn/elperu/tmp/speech_datasets/LibriSpeech/embd_identification' # Perform the enrollment and save the results enroll_per_spk(use_cuda, test_frames, model, DB_all, embedding_dir)
def load_dataset(M): train_DB = read_feats_structure(c.TRAIN_FEAT_DIR) print(f'\nTraining set {len(train_DB)}') file_loader = read_MFB # numpy array:(n_frames, n_dims) transform = transforms.Compose([ TruncatedInputfromMFB(), # numpy array:(1, n_frames, n_dims) ToTensorInput() # torch tensor:(1, n_dims, n_frames) ]) transform_T = ToTensorDevInput() speaker_list = sorted(set( train_DB['speaker_id'])) # len(speaker_list) == n_speakers spk_to_idx = {spk: i for i, spk in enumerate(speaker_list)} train_dataset = contrastiveDataset(DB=train_DB, loader=file_loader, transform=transform, spk_to_idx=spk_to_idx, spk_list=speaker_list, M=M) n_classes = len(speaker_list) # How many speakers? 240 print('\nNumber of classes (speakers):\n{}\n'.format(n_classes)) return train_dataset, n_classes
def split_enroll_and_test(dataroot_dir): DB_all = read_feats_structure(dataroot_dir) enroll_DB = pd.DataFrame() test_DB = pd.DataFrame() enroll_DB = DB_all[DB_all['filename'].str.contains('enroll.p')] test_DB = DB_all[DB_all['filename'].str.contains('test.p')] # Reset the index enroll_DB = enroll_DB.reset_index(drop=True) test_DB = test_DB.reset_index(drop=True) return enroll_DB, test_DB
def split_train_dev(train_feat_dir, valid_ratio): train_valid_DB = read_feats_structure(train_feat_dir) total_len = len(train_valid_DB) # 148642 valid_len = int(total_len * valid_ratio/100.) train_len = total_len - valid_len shuffled_train_valid_DB = train_valid_DB.sample(frac=1).reset_index(drop=True) # Split the DB into train and valid set train_DB = shuffled_train_valid_DB.iloc[:train_len] valid_DB = shuffled_train_valid_DB.iloc[train_len:] # Reset the index train_DB = train_DB.reset_index(drop=True) valid_DB = valid_DB.reset_index(drop=True) print('\nTraining set %d utts (%0.1f%%)' %(train_len, (train_len/total_len)*100)) print('Validation set %d utts (%0.1f%%)' %(valid_len, (valid_len/total_len)*100)) print('Total %d utts' %(total_len)) return train_DB, valid_DB