def main():

    # Settings
    use_cuda = True
    log_dir = 'identification_model_ckpt'
    cp_num = 50  # Which checkpoint to use?
    test_frames = 200

    # Model params
    resnet_version = 'resnet18'
    embedding_size = 128
    n_classes = 200

    # Load model from checkpoint
    model = load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes,
                       resnet_version)

    # Get the dataframe for enroll DB
    featrues_path = '/cas/DeepLearn/elperu/tmp/speech_datasets/LibriSpeech/train_test_split/test'

    DB_all = read_feats_structure(featrues_path)

    # Where to save embeddings
    embedding_dir = '/cas/DeepLearn/elperu/tmp/speech_datasets/LibriSpeech/embd_identification'

    # Perform the enrollment and save the results
    enroll_per_spk(use_cuda, test_frames, model, DB_all, embedding_dir)
def load_dataset(M):
    train_DB = read_feats_structure(c.TRAIN_FEAT_DIR)
    print(f'\nTraining set {len(train_DB)}')

    file_loader = read_MFB  # numpy array:(n_frames, n_dims)

    transform = transforms.Compose([
        TruncatedInputfromMFB(),  # numpy array:(1, n_frames, n_dims)
        ToTensorInput()  # torch tensor:(1, n_dims, n_frames)
    ])
    transform_T = ToTensorDevInput()

    speaker_list = sorted(set(
        train_DB['speaker_id']))  # len(speaker_list) == n_speakers
    spk_to_idx = {spk: i for i, spk in enumerate(speaker_list)}

    train_dataset = contrastiveDataset(DB=train_DB,
                                       loader=file_loader,
                                       transform=transform,
                                       spk_to_idx=spk_to_idx,
                                       spk_list=speaker_list,
                                       M=M)

    n_classes = len(speaker_list)  # How many speakers? 240
    print('\nNumber of classes (speakers):\n{}\n'.format(n_classes))
    return train_dataset, n_classes
def split_enroll_and_test(dataroot_dir):
    DB_all = read_feats_structure(dataroot_dir)
    enroll_DB = pd.DataFrame()
    test_DB = pd.DataFrame()
    
    enroll_DB = DB_all[DB_all['filename'].str.contains('enroll.p')]
    test_DB = DB_all[DB_all['filename'].str.contains('test.p')]
    
    # Reset the index
    enroll_DB = enroll_DB.reset_index(drop=True)
    test_DB = test_DB.reset_index(drop=True)
    return enroll_DB, test_DB
示例#4
0
def split_train_dev(train_feat_dir, valid_ratio):
    train_valid_DB = read_feats_structure(train_feat_dir)
    total_len = len(train_valid_DB) # 148642
    valid_len = int(total_len * valid_ratio/100.)
    train_len = total_len - valid_len
    shuffled_train_valid_DB = train_valid_DB.sample(frac=1).reset_index(drop=True)
    # Split the DB into train and valid set
    train_DB = shuffled_train_valid_DB.iloc[:train_len]
    valid_DB = shuffled_train_valid_DB.iloc[train_len:]
    # Reset the index
    train_DB = train_DB.reset_index(drop=True)
    valid_DB = valid_DB.reset_index(drop=True)
    print('\nTraining set %d utts (%0.1f%%)' %(train_len, (train_len/total_len)*100))
    print('Validation set %d utts (%0.1f%%)' %(valid_len, (valid_len/total_len)*100))
    print('Total %d utts' %(total_len))
    
    return train_DB, valid_DB