Exemplo n.º 1
0
 def test_load_sphere(self):
     filepath = get_root_dir(
     ) / 'data' / 'timit' / 'train' / 'dr1' / 'mwar0' / 'sx415.wav'
     samples, sample_rate = load_sphere(filepath)
     self.assertEqual(len(samples.shape), 1)
     self.assertEqual(samples.shape[0], 38810)
     self.assertEqual(sample_rate, 16000)
Exemplo n.º 2
0
def get_phone_mapping():
    """
    Generates:
    - dictionary (origin phone -> train label), to load targets for the model from transcriptions. Different phones can
        be mapped to the same label, as a subset of phones is used for training (48 phones).
    - dictionary (train label -> test label), to evaluate the model on a subset of the training phones (39 phones).
    - dictionary (test label -> test phone), to print the names (e.g. in confusion matrix)

    The training and test phone subsets are chosen according to standard recipes for TIMIT.

    :return: tuple (phone_labels, evaluation_mapping, test_label_to_test_phone), containing the described dictionaries.
    """
    # read file
    filepath = get_root_dir() / 'data' / 'timit_phones_60-48-39.map'
    with filepath.open() as csv_file:
        data_frame = pd.read_csv(csv_file, sep='\t')
    data_frame = data_frame.dropna()

    # load phone mappings
    origin_to_train_phone = {
        op: tp
        for op, tp in zip(data_frame['origin'], data_frame['train'])
    }
    origin_to_test_phone = {
        op: tp
        for op, tp in zip(data_frame['origin'], data_frame['test'])
    }

    # generate labels (sorting in order to be sure that multiple calls generate always the same dictionaries)
    train_labels = {
        phone: label
        for label, phone in enumerate(sorted(data_frame['train'].unique()))
    }
    test_labels = {
        phone: label
        for label, phone in enumerate(sorted(data_frame['test'].unique()))
    }

    # get phone labels (origin phone -> train label, to generate targets from transcriptions)
    origin_phone_to_train_label = {}
    for origin_phone in data_frame['origin']:
        train_phone = origin_to_train_phone[origin_phone]
        origin_phone_to_train_label[origin_phone] = train_labels[train_phone]

    # get evaluation mapping (train label -> test label, to evaluate the model using a subset of phones)
    train_label_to_test_label = {}
    for origin_phone in data_frame['origin']:
        test_phone = origin_to_test_phone[origin_phone]
        train_label = origin_phone_to_train_label[origin_phone]
        train_label_to_test_label[train_label] = test_labels[test_phone]

    # get test class names (for confusion matrix)
    test_label_to_test_phone = {
        value: key
        for key, value in test_labels.items()
    }

    return origin_phone_to_train_label, train_label_to_test_label, test_label_to_test_phone
Exemplo n.º 3
0
def get_core_test_speakers():
    """
    Returns a dictionary (dialect -> list of speaker_id) for the core test set.

    :return: dictionary (dialect -> list of speaker_id)
    """
    filepath = get_root_dir() / 'data' / 'timit_core_test.json'
    with filepath.open() as json_file:
        return json.load(json_file)
Exemplo n.º 4
0
 def test_extract_features(self):
     filepath = get_root_dir(
     ) / 'data' / 'timit' / 'train' / 'dr1' / 'mwar0' / 'sx415.wav'
     win_len = 0.03
     win_shift = 0.01
     samples, sample_rate = load_sphere(filepath)
     features = extract_features(samples, sample_rate, win_len, win_shift)
     n_frames = 1 + round((samples.shape[0] - win_len * sample_rate) /
                          (win_shift * sample_rate))
     self.assertTrue(features.shape[0] - n_frames <= 1)
Exemplo n.º 5
0
def load_data(dataset_path, core_test=True, force_preprocess=False):
    """
    Returns training and test set containing features (13 MFCC + delta + delta-delta) and labels (phones encoded as
    integers).

    The split in training and test sets is the recommended one (see timit/readme.doc and timit/doc/testset.doc).

    :param dataset_path: path to the dataset. Since the TIMIT dataset is protected by copyright, it is not distributed
        with the package.
    :param core_test: whether to use the core test set (see timit/doc/testset.doc) instead of the complete test set
    :param force_preprocess: force to pre-process again, even if saved data can be loaded
    :return: tuple (train_set, test_set), where train_set and test_set are numpy arrays of utterances. Each utterance
        is a dictionary containing utterance info useful for normalization, feature vectors, and phone labels.
    """
    dataset_path = Path(dataset_path)
    if not dataset_path.is_dir():
        raise ValueError('Invalid dataset path')

    # training set
    filepath = get_root_dir() / 'data' / 'timit_train.npz'
    if filepath.is_file() and not force_preprocess:
        print('Loading training set...', end=' ')
        train_set = np.load(filepath, allow_pickle=True)['train_set']
        print('done')
    else:
        train_set = _preprocess_data(dataset_path / 'train')
        np.savez(filepath, train_set=train_set)

    # test set
    filepath = get_root_dir() / 'data' / ('timit_' +
                                          ('core_' if core_test else '') +
                                          'test.npz')
    if filepath.is_file() and not force_preprocess:
        print('Loading test set...', end=' ')
        test_set = np.load(filepath, allow_pickle=True)['test_set']
        print('done')
    else:
        test_set = _preprocess_data(dataset_path / 'test', core_test)
        np.savez(filepath, test_set=test_set)

    return train_set, test_set
 def test_load_transcription(self):
     filepath = get_root_dir(
     ) / 'data' / 'timit' / 'train' / 'dr1' / 'fcjf0' / 'sa1.phn'
     transcription = load_transcription(filepath)
     self.assertTupleEqual(transcription[0], (0, 3050, 'h#'))
     self.assertTupleEqual(transcription[5], (8772, 9190, 'dcl'))
     self.assertTupleEqual(transcription[10], (12640, 14714, 'ah'))
     self.assertTupleEqual(transcription[15], (20417, 21199, 'q'))
     self.assertTupleEqual(transcription[20], (24229, 25566, 'ix'))
     self.assertTupleEqual(transcription[25], (31719, 33360, 'sh'))
     self.assertTupleEqual(transcription[30], (36326, 37556, 'axr'))
     self.assertTupleEqual(transcription[36], (44586, 46720, 'h#'))
Exemplo n.º 7
0
    def test_normalize(self):
        dataset_path = get_root_dir() / 'data' / 'timit'
        train_set, _ = timit.load_data(dataset_path)

        # test normalization on whole dataset
        normalized_train_set, _ = normalize(train_set, mode='full')
        x_train = np.concatenate(
            [utterance['features'] for utterance in normalized_train_set])
        mean = x_train.mean(axis=0)
        var = x_train.var(axis=0)
        for i in range(x_train.shape[1]):
            self.assertAlmostEqual(mean[i], 0)
            self.assertAlmostEqual(var[i], 1)
Exemplo n.º 8
0
 def test_extract_labels(self):
     filepath = get_root_dir(
     ) / 'data' / 'timit' / 'train' / 'dr1' / 'mwar0' / 'sx415.wav'
     _, sample_rate = load_sphere(filepath)
     filepath = filepath.with_suffix('.phn')
     transcription = load_transcription(filepath)
     win_len = 0.03
     win_shift = 0.01
     n_frames = get_number_of_frames(38720, sample_rate, win_len, win_shift)
     labels = extract_labels(transcription, sample_rate, n_frames, win_len,
                             win_shift)
     self.assertEqual(len(set(labels)), 25)
     self.assertEqual(len(labels), 240)
Exemplo n.º 9
0
    def test_unlabel(self):
        dataset_path = get_root_dir() / 'data' / 'timit'
        train_set, _ = timit.load_data(dataset_path)
        n_total = len(train_set)

        unlabel(train_set, 0.7, seed=1)
        n_labeled = len(
            [utterance for utterance in train_set if 'labels' in utterance])
        n_unlabeled = len([
            utterance for utterance in train_set if 'labels' not in utterance
        ])

        self.assertEqual(n_labeled + n_unlabeled, n_total)
        self.assertTrue(n_labeled < n_unlabeled)
        self.assertEqual(n_labeled, 1104)
        self.assertEqual(n_unlabeled, 2592)
Exemplo n.º 10
0
def _split_validation_unique(train_set):
    doc_path = get_root_dir() / 'data' / 'spkrinfo_spkrsent.txt'

    # shuffle data
    with open(doc_path, 'r') as source:
        lines = [line for line in source]
        lines[-1] += '\n'
        lines = np.array(lines)
        np.random.shuffle(lines)

    drs = [[2, 1], [2, 1], [2, 1], [2, 1], [2, 1], [2, 1], [2, 1],
           [1, 1]]  # [male, female] for each dialect
    new_speakers = []  # unique speakers
    sentence_ids = []  # unique speakers sentences

    for line in lines:
        columns = line.split()
        condition = (not columns[4] in sentence_ids) and (
            not columns[5]
            in sentence_ids) and (not columns[6] in sentence_ids) and (
                not columns[7] in sentence_ids) and (not columns[8]
                                                     in sentence_ids)
        if columns[
                3] == 'TRN' and condition:  # never seen a speaker saying this sentence
            if drs[int(columns[2]) -
                   1][0] != 0 and columns[1] == 'M':  # if males is not filled
                sentence_ids.extend([
                    columns[4], columns[5], columns[6], columns[7], columns[8],
                    columns[9], columns[10], columns[11]
                ])
                new_speakers.append(columns[0])
                drs[int(columns[2]) - 1][0] -= 1

            elif (drs[int(columns[2]) - 1][1] != 0) and columns[1] == 'F':
                sentence_ids.extend([
                    columns[4], columns[5], columns[6], columns[7], columns[8],
                    columns[9], columns[10], columns[11]
                ])
                new_speakers.append(columns[0])
                drs[int(columns[2]) - 1][1] -= 1

    pair_speakers = []  # unique speakers pairs (for the complete_valid_set)
    pair_sentence_ids = [
    ]  # unique speakers pairs sentences (for the complete_valid_set)

    for line in lines:
        columns = line.split()
        condition = (not columns[4] in sentence_ids) and (
            not columns[5]
            in sentence_ids) and (not columns[6] in sentence_ids) and (
                not columns[7] in sentence_ids) and (not columns[8]
                                                     in sentence_ids)
        if columns[3] == 'TRN' and not condition:
            pair_speakers.append(columns[0])
            pair_sentence_ids.extend([
                columns[4], columns[5], columns[6], columns[7], columns[8],
                columns[9], columns[10], columns[11]
            ])

    valid = []
    train = []
    complete_valid = []
    new_speakers = [x.lower() for x in new_speakers]
    pair_speakers = [x.lower() for x in pair_speakers]

    for utterance in train_set:
        if utterance['speaker_id'] in new_speakers:
            valid.append(utterance)
        if utterance['speaker_id'] in pair_speakers:
            complete_valid.append(utterance)
        if (not utterance['speaker_id'] in pair_speakers) and (
                not utterance['speaker_id'] in new_speakers):
            train.append(utterance)

    train = np.asarray(train)
    valid = np.asarray(valid)
    complete_valid = np.asarray(complete_valid)

    return train, valid, complete_valid