Exemplo n.º 1
0
    def test_write_segments(self, writer, tmpdir):
        ds = resources.create_dataset()
        path = tmpdir.strpath
        writer.save(ds, path)

        content = textfile.read_separated_lines(os.path.join(path, 'segments'),
                                                separator=' ',
                                                max_columns=4)

        assert content[0][0] == 'utt-1'
        assert content[0][1] == 'wav-1'
        assert float(content[0][2]) == 0
        assert float(content[0][3]) == -1

        assert content[1][0] == 'utt-2'
        assert content[1][1] == 'wav_2'
        assert float(content[1][2]) == 0
        assert float(content[1][3]) == -1

        assert content[2][0] == 'utt-3'
        assert content[2][1] == 'wav_3'
        assert float(content[2][2]) == pytest.approx(0)
        assert float(content[2][3]) == pytest.approx(1.5)

        assert content[3][0] == 'utt-4'
        assert content[3][1] == 'wav_3'
        assert float(content[3][2]) == pytest.approx(1.5)
        assert float(content[3][3]) == pytest.approx(2.5)

        assert content[4][0] == 'utt-5'
        assert content[4][1] == 'wav_4'
        assert float(content[4][2]) == 0
        assert float(content[4][3]) == -1
Exemplo n.º 2
0
    def test_exports_wavs_from_container_tracks(self, writer, tmpdir):
        path = tmpdir.strpath
        container_ds_path = os.path.join(path, 'container_ds')
        out_path = os.path.join(path, 'export')

        ds = resources.create_dataset()
        ds.relocate_audio_to_single_container(container_ds_path)

        writer.save(ds, out_path)

        print(os.listdir(out_path))

        track_path = os.path.join(out_path, 'audio', 'wav-1.wav')
        track = tracks.FileTrack(None, track_path)
        assert os.path.isfile(track_path)
        assert track.duration == pytest.approx(2.5951875)
        assert np.allclose(track.read_samples(),
                           ds.tracks['wav-1'].read_samples(),
                           atol=1e-05)

        track_path = os.path.join(out_path, 'audio', 'wav_2.wav')
        track = tracks.FileTrack(None, track_path)
        assert os.path.isfile(track_path)
        assert track.duration == pytest.approx(2.5951875)

        track_path = os.path.join(out_path, 'audio', 'wav_3.wav')
        track = tracks.FileTrack(None, track_path)
        assert os.path.isfile(track_path)
        assert track.duration == pytest.approx(2.5951875)

        track_path = os.path.join(out_path, 'audio', 'wav_4.wav')
        track = tracks.FileTrack(None, track_path)
        assert os.path.isfile(track_path)
        assert track.duration == pytest.approx(2.5951875)
Exemplo n.º 3
0
    def test_split_utterances_to_max_time(self):
        corpus = resources.create_dataset()
        result = corpus.split_utterances_to_max_time(max_time=1.5, overlap=0.5)

        assert result.num_tracks == corpus.num_tracks
        assert result.num_issuers == corpus.num_issuers
        assert result.num_utterances == 8

        utt_len = 2.5951875

        assert result.utterances['utt-1_0'].start == 0.0
        assert result.utterances['utt-1_0'].end == utt_len / 2.0 + 0.5
        assert result.utterances['utt-1_1'].start == utt_len / 2.0 - 0.5
        assert result.utterances['utt-1_1'].end == float('inf')

        assert result.utterances['utt-2_0'].start == 0.0
        assert result.utterances['utt-2_0'].end == utt_len / 2.0 + 0.5
        assert result.utterances['utt-2_1'].start == utt_len / 2.0 - 0.5
        assert result.utterances['utt-2_1'].end == float('inf')

        assert result.utterances['utt-3'].start == 0.0
        assert result.utterances['utt-3'].end == 1.5

        assert result.utterances['utt-4'].start == 1.5
        assert result.utterances['utt-4'].end == 2.5

        assert result.utterances['utt-5_0'].start == 0.0
        assert result.utterances['utt-5_0'].end == utt_len / 2.0 + 0.5
        assert result.utterances['utt-5_1'].start == utt_len / 2.0 - 0.5
        assert result.utterances['utt-5_1'].end == float('inf')
Exemplo n.º 4
0
    def test_write_segments_absolute_times(self, writer, tmpdir):
        writer = io.KaldiWriter(use_absolute_times=True)
        ds = resources.create_dataset()
        path = tmpdir.strpath
        writer.save(ds, path)

        content = textfile.read_separated_lines(os.path.join(path, 'segments'),
                                                separator=' ',
                                                max_columns=4)

        assert content[0][0] == 'spk-1-utt-1'
        assert content[0][1] == 'wav-1'
        assert float(content[0][2]) == 0
        assert float(content[0][3]) == pytest.approx(2.5951875)

        assert content[1][0] == 'spk-1-utt-2'
        assert content[1][1] == 'wav_2'
        assert float(content[1][2]) == 0
        assert float(content[1][3]) == pytest.approx(2.5951875)

        assert content[2][0] == 'spk-2-utt-3'
        assert content[2][1] == 'wav_3'
        assert float(content[2][2]) == pytest.approx(0)
        assert float(content[2][3]) == pytest.approx(1.5)

        assert content[3][0] == 'spk-2-utt-4'
        assert content[3][1] == 'wav_3'
        assert float(content[3][2]) == pytest.approx(1.5)
        assert float(content[3][3]) == pytest.approx(2.5)

        assert content[4][0] == 'spk-3-utt-5'
        assert content[4][1] == 'wav_4'
        assert float(content[4][2]) == 0
        assert float(content[4][3]) == pytest.approx(2.5951875)
Exemplo n.º 5
0
    def test_process_features_online_with_frame_hop_size_change_stores_correct(self, processor, tmpdir):
        ds = resources.create_dataset()

        in_feat_path = os.path.join(tmpdir.strpath, 'in_feats')
        out_feat_path = os.path.join(tmpdir.strpath, 'out_feats')

        in_feats = containers.FeatureContainer(in_feat_path)
        utt_feats = np.arange(30).reshape(5, 6)

        with in_feats:
            in_feats.sampling_rate = 16000
            in_feats.frame_size = 400
            in_feats.hop_size = 160

            for utt_idx in ds.utterances.keys():
                in_feats.set(utt_idx, utt_feats)

        processor.mock_frame_size_scale = 2.0
        processor.mock_hop_size_scale = 2.0
        processor.process_features_online(ds, in_feats, out_feat_path)

        out_feats = containers.FeatureContainer(out_feat_path)

        with out_feats:
            assert out_feats.frame_size == 800
            assert out_feats.hop_size == 320
Exemplo n.º 6
0
    def test_process_features(self, processor, tmpdir):
        ds = resources.create_dataset()

        in_feat_path = os.path.join(tmpdir.strpath, 'in_feats')
        out_feat_path = os.path.join(tmpdir.strpath, 'out_feats')

        in_feats = containers.FeatureContainer(in_feat_path)
        utt_feats = np.arange(30).reshape(5, 6)

        with in_feats:
            in_feats.sampling_rate = 16000
            in_feats.frame_size = 400
            in_feats.hop_size = 160

            for utt_idx in ds.utterances.keys():
                in_feats.set(utt_idx, utt_feats)

        processor.process_features(ds, in_feats, out_feat_path)

        out_feats = containers.FeatureContainer(out_feat_path)

        with out_feats:
            assert len(out_feats.keys()) == 5

            assert np.array_equal(out_feats.get('utt-1', mem_map=False), utt_feats)
            assert np.array_equal(out_feats.get('utt-2', mem_map=False), utt_feats)
            assert np.array_equal(out_feats.get('utt-3', mem_map=False), utt_feats)
            assert np.array_equal(out_feats.get('utt-4', mem_map=False), utt_feats)
            assert np.array_equal(out_feats.get('utt-5', mem_map=False), utt_feats)
Exemplo n.º 7
0
def sample_multi_frame_dataset(tmpdir):
    inputs_path = os.path.join(tmpdir.strpath, 'inputs.hdf5')
    targets_path = os.path.join(tmpdir.strpath, 'targets.hdf5')

    corpus = resources.create_dataset()
    container_inputs = containers.Container(inputs_path)
    container_targets = containers.Container(targets_path)

    container_inputs.open()
    container_targets.open()

    container_inputs.set('utt-1', np.arange(60).reshape(15, 4))
    container_inputs.set('utt-2', np.arange(80).reshape(20, 4))
    container_inputs.set('utt-3', np.arange(44).reshape(11, 4))
    container_inputs.set('utt-4', np.arange(12).reshape(3, 4))
    container_inputs.set('utt-5', np.arange(16).reshape(4, 4))

    container_targets.set('utt-1', np.arange(30).reshape(15, 2))
    container_targets.set('utt-2', np.arange(40).reshape(20, 2))
    container_targets.set('utt-3', np.arange(22).reshape(11, 2))
    container_targets.set('utt-4', np.arange(6).reshape(3, 2))
    container_targets.set('utt-5', np.arange(8).reshape(4, 2))

    return feeding.MultiFrameDataset(corpus,
                                     [container_inputs, container_targets], 4)
Exemplo n.º 8
0
    def test_process_features_online_with_given_chunk_size(self, processor, tmpdir):
        ds = resources.create_dataset()

        in_feat_path = os.path.join(tmpdir.strpath, 'in_feats')
        out_feat_path = os.path.join(tmpdir.strpath, 'out_feats')

        in_feats = containers.FeatureContainer(in_feat_path)
        utt_feats = np.arange(90).reshape(15, 6)

        with in_feats:
            in_feats.sampling_rate = 16000
            in_feats.frame_size = 400
            in_feats.hop_size = 160

            for utt_idx in ds.utterances.keys():
                in_feats.set(utt_idx, utt_feats)

        processor.process_features_online(ds, in_feats, out_feat_path, chunk_size=4)

        out_feats = containers.FeatureContainer(out_feat_path)

        assert len(processor.called_with_data) == 4 * 5
        assert processor.called_with_data[0].shape == (4, 6)
        assert processor.called_with_data[3].shape == (3, 6)

        with out_feats:
            assert len(out_feats.keys()) == 5

            assert np.array_equal(out_feats.get('utt-1', mem_map=False), utt_feats)
            assert np.array_equal(out_feats.get('utt-2', mem_map=False), utt_feats)
            assert np.array_equal(out_feats.get('utt-3', mem_map=False), utt_feats)
            assert np.array_equal(out_feats.get('utt-4', mem_map=False), utt_feats)
            assert np.array_equal(out_feats.get('utt-5', mem_map=False), utt_feats)
Exemplo n.º 9
0
    def test_write_segments_no_speaker_prefix(self, tmpdir):
        writer = io.KaldiWriter(prefix_utterances_with_speaker=False)
        ds = resources.create_dataset()
        path = tmpdir.strpath
        writer.save(ds, path)

        content = textfile.read_separated_lines(os.path.join(path, 'segments'),
                                                separator=' ',
                                                max_columns=4)

        assert content[0][0] == 'utt-1'
        assert content[0][1] == 'wav-1'
        assert float(content[0][2]) == 0
        assert float(content[0][3]) == -1

        assert content[1][0] == 'utt-2'
        assert content[1][1] == 'wav_2'
        assert float(content[1][2]) == 0
        assert float(content[1][3]) == -1

        assert content[2][0] == 'utt-3'
        assert content[2][1] == 'wav_3'
        assert float(content[2][2]) == pytest.approx(0)
        assert float(content[2][3]) == pytest.approx(1.5)

        assert content[3][0] == 'utt-4'
        assert content[3][1] == 'wav_3'
        assert float(content[3][2]) == pytest.approx(1.5)
        assert float(content[3][3]) == pytest.approx(2.5)

        assert content[4][0] == 'utt-5'
        assert content[4][1] == 'wav_4'
        assert float(content[4][2]) == 0
        assert float(content[4][3]) == -1
Exemplo n.º 10
0
    def test_write_utt2spk(self, writer, tmpdir):
        ds = resources.create_dataset()

        # Add utt without issuer
        # so in utt2spk it ends up with "utt-idx utt-idx"
        ds.new_file('/random/path', 'wav-33')
        ds.new_utterance('utt-23', 'wav-33')

        path = tmpdir.strpath
        writer.save(ds, path)

        content = textfile.read_separated_lines(os.path.join(path, 'utt2spk'),
                                                separator=' ',
                                                max_columns=2)

        assert content[0][0] == 'spk-1-utt-1'
        assert content[0][1] == 'spk-1'

        assert content[1][0] == 'spk-1-utt-2'
        assert content[1][1] == 'spk-1'

        assert content[2][0] == 'spk-2-utt-3'
        assert content[2][1] == 'spk-2'

        assert content[3][0] == 'spk-2-utt-4'
        assert content[3][1] == 'spk-2'

        assert content[4][0] == 'spk-3-utt-5'
        assert content[4][1] == 'spk-3'

        assert content[5][0] == 'utt-23'
        assert content[5][1] == 'utt-23'
Exemplo n.º 11
0
    def test_parse(self):
        corpus = resources.create_dataset()
        sv = subview.Subview.parse(
            'matching_utterance_ids\ninclude,utt-1,utt-3', corpus=corpus)

        assert len(sv.filter_criteria) == 1
        assert sv.filter_criteria[0].utterance_idxs == {'utt-1', 'utt-3'}
Exemplo n.º 12
0
    def test_process_features_online_ignores_none(self, processor, tmpdir):
        ds = resources.create_dataset()

        in_feat_path = os.path.join(tmpdir.strpath, 'in_feats')
        out_feat_path = os.path.join(tmpdir.strpath, 'out_feats')

        in_feats = containers.FeatureContainer(in_feat_path)
        utt_feats = np.arange(90).reshape(15, 6)

        with in_feats:
            in_feats.sampling_rate = 16000
            in_feats.frame_size = 400
            in_feats.hop_size = 160

            for utt_idx in ds.utterances:
                in_feats.set(utt_idx, utt_feats)

        def return_none(*args, **kwargs):
            return None

        processor.process_frames = return_none
        processor.process_features_online(ds,
                                          in_feats,
                                          out_feat_path,
                                          chunk_size=4)

        assert True
Exemplo n.º 13
0
    def test_save_subset_dev(self, writer, tmpdir):
        ds = resources.create_dataset()
        writer.save(ds, tmpdir.strpath)

        all_path = os.path.join(tmpdir.strpath, 'dev.csv')

        assert os.path.isfile(all_path)

        records = textfile.read_separated_lines(all_path, separator=',')

        assert len(records) == 3

        # HEADER
        assert len(records[0]) == 3
        assert records[0][1] == 'wav_filesize'
        assert records[0][2] == 'transcript'

        # DATA RECORDS
        utts = {r[0]: (r[1], r[2]) for r in records[1:]}

        path = os.path.join(tmpdir.strpath, 'audio', 'utt-4.wav')
        assert len(utts[path]) == 2
        assert utts[path][0] == '32044'
        assert utts[path][1] == ds.utterances['utt-4'].label_lists[
            corpus.LL_WORD_TRANSCRIPT].labels[0].value

        path = ds.utterances['utt-5'].track.path
        assert len(utts[path]) == 2
        assert utts[path][0] == '83090'
        assert utts[path][1] == ds.utterances['utt-5'].label_lists[
            corpus.LL_WORD_TRANSCRIPT].labels[0].value
Exemplo n.º 14
0
    def test_save(self, writer, tmpdir):
        ds = resources.create_dataset()
        path = tmpdir.strpath
        writer.save(ds, path)

        assert 'segments' in os.listdir(path)
        assert 'text' in os.listdir(path)
        assert 'utt2spk' in os.listdir(path)
        assert 'wav.scp' in os.listdir(path)
Exemplo n.º 15
0
    def test_process_corpus_online_sets_container_attributes(self, processor, tmpdir):
        ds = resources.create_dataset()
        feat_path = os.path.join(tmpdir.strpath, 'feats')

        feat_container = processor.process_corpus_online(ds, feat_path, frame_size=4096, hop_size=2048)

        with feat_container:
            assert feat_container.frame_size == 4096
            assert feat_container.hop_size == 2048
            assert feat_container.sampling_rate == 16000
Exemplo n.º 16
0
    def test_all_tokens_returns_only_from_selected_label_lists(self):
        corpus = resources.create_dataset()
        ll = annotations.LabelList(idx='test', labels=[
            annotations.Label('what can he do')
        ])
        corpus.utterances['utt-1'].set_label_list(ll)

        target_lls = [audiomate.corpus.LL_WORD_TRANSCRIPT]
        expected_tokens = {'who', 'am', 'i', 'are', 'is', 'he', 'you', 'she', 'they'}
        assert corpus.all_tokens(label_list_ids=target_lls) == expected_tokens
Exemplo n.º 17
0
    def test_all_tokens_with_custom_delimiter(self):
        corpus = resources.create_dataset()
        ll = annotations.LabelList(idx='test', labels=[
            annotations.Label('a, b, a, c')
        ])
        corpus.utterances['utt-1'].set_label_list(ll)

        target_lls = ['test']
        expected_tokens = {'a', 'b', 'c'}
        assert corpus.all_tokens(delimiter=',', label_list_ids=target_lls) == expected_tokens
Exemplo n.º 18
0
    def test_process_corpus_online_ignore_returning_none(self, processor, tmpdir):
        ds = resources.create_dataset()
        feat_path = os.path.join(tmpdir.strpath, 'feats')

        def return_none(*args, **kwargs):
            return None

        processor.process_frames = return_none
        processor.process_corpus_online(ds, feat_path, frame_size=4096, hop_size=2048)

        assert True
Exemplo n.º 19
0
    def test_validate(self):
        ds = resources.create_dataset()
        ds.utterances['utt-3'].label_lists[corpus.LL_WORD_TRANSCRIPT][0].value = 'max length here 11'
        ds.utterances['utt-4'].label_lists[corpus.LL_WORD_TRANSCRIPT][0].value = 'too long here'

        val = validation.UtteranceTranscriptionRatioValidator(10, corpus.LL_WORD_TRANSCRIPT)
        result = val.validate(ds)

        assert not result.passed
        assert len(result.invalid_utterances) == 1
        assert 'utt-4' in result.invalid_utterances.keys()
Exemplo n.º 20
0
    def test_init_with_corpus_view(self):
        corpus = resources.create_dataset()
        subview = subset.Subview(
            corpus,
            filter_criteria=[
                subset.MatchingUtteranceIdxFilter(
                    utterance_idxs={'utt-1', 'utt-2', 'utt-4'})
            ])

        it = feeding.DataIterator(subview, [containers.Container('blub')])
        assert set(it.utt_ids) == set(subview.utterances.keys())
Exemplo n.º 21
0
    def test_save(self):
        ds = resources.create_dataset()
        path = tempfile.mkdtemp()
        self.writer.save(ds, path)

        assert 'segments' in os.listdir(path)
        assert 'text' in os.listdir(path)
        assert 'utt2spk' in os.listdir(path)
        assert 'spk2gender' in os.listdir(path)
        assert 'wav.scp' in os.listdir(path)

        shutil.rmtree(path, ignore_errors=True)
Exemplo n.º 22
0
    def test_init_with_corpus(self, tmpdir):
        c = containers.Container(os.path.join(tmpdir.strpath, 'test.h5'))
        c.open()
        c.set('utt-1', data=np.arange(20))
        c.set('utt-2', data=np.arange(20))
        c.set('utt-3', data=np.arange(20))
        c.set('utt-4', data=np.arange(20))
        c.set('utt-5', data=np.arange(20))

        corpus = resources.create_dataset()
        it = feeding.Dataset(corpus, [c])
        assert it.utt_ids == ['utt-1', 'utt-2', 'utt-3', 'utt-4', 'utt-5']
Exemplo n.º 23
0
    def test_validate(self):
        ds = resources.create_dataset()
        ds.utterances['utt-3'].label_lists[corpus.LL_WORD_TRANSCRIPT].labels = []
        del ds.utterances['utt-4'].label_lists[corpus.LL_WORD_TRANSCRIPT]

        val = validation.LabelCountValidator(1, corpus.LL_WORD_TRANSCRIPT)
        result = val.validate(ds)

        assert not result.passed
        assert len(result.invalid_utterances) == 2
        assert result.invalid_utterances['utt-3'] == 'Only {} labels'.format(0)
        assert result.invalid_utterances['utt-4'] == 'No label-list {}'.format(corpus.LL_WORD_TRANSCRIPT)
Exemplo n.º 24
0
    def test_process_corpus_online_with_frame_hop_size_change_stores_correct(self, processor, tmpdir):
        ds = resources.create_dataset()
        feat_path = os.path.join(tmpdir.strpath, 'feats')

        processor.mock_frame_size_scale = 0.5
        processor.mock_hop_size_scale = 0.25
        processor.process_corpus_online(ds, feat_path, frame_size=4096, hop_size=2048)

        fc = containers.FeatureContainer(feat_path)
        fc.open()

        assert fc.frame_size == 2048
        assert fc.hop_size == 512

        fc.close()
Exemplo n.º 25
0
def ds():
    ds = resources.create_dataset()

    file_1_path = resources.sample_wav_file('wav_1.wav')
    file_2_path = resources.get_resource_path(
        ('audio_formats', 'mp3_2_44_1k_16b.mp3'))
    file_3_path = resources.get_resource_path(
        ('audio_formats', 'flac_1_16k_16b.flac'))
    file_4_path = resources.sample_wav_file('wav_4.wav')

    ds.tracks['wav-1'].path = file_1_path
    ds.tracks['wav_2'].path = file_2_path
    ds.tracks['wav_3'].path = file_3_path
    ds.tracks['wav_4'].path = file_4_path

    return ds
Exemplo n.º 26
0
    def test_process_corpus_online(self, processor, tmpdir):
        ds = resources.create_dataset()
        feat_path = os.path.join(tmpdir.strpath, 'feats')

        processor.process_corpus_online(ds, feat_path, frame_size=4096, hop_size=2048)

        with h5py.File(feat_path, 'r') as f:
            utts = set(f.keys())

            assert utts == set(ds.utterances.keys())

            assert f['utt-1'].shape == (20, 4096)
            assert f['utt-2'].shape == (20, 4096)
            assert f['utt-3'].shape == (11, 4096)
            assert f['utt-4'].shape == (7, 4096)
            assert f['utt-5'].shape == (20, 4096)
Exemplo n.º 27
0
    def test_validate(self):
        ds = resources.create_dataset()
        ds.utterances['utt-3'].set_label_list(
            annotations.LabelList.create_single('max length here 11',
                                                idx=corpus.LL_WORD_TRANSCRIPT))

        ds.utterances['utt-4'].set_label_list(
            annotations.LabelList.create_single('too long here',
                                                idx=corpus.LL_WORD_TRANSCRIPT))

        val = validation.UtteranceTranscriptionRatioValidator(
            10, corpus.LL_WORD_TRANSCRIPT)
        result = val.validate(ds)

        assert not result.passed
        assert len(result.invalid_utterances) == 1
        assert 'utt-4' in result.invalid_utterances.keys()
Exemplo n.º 28
0
    def test_init_with_corpus_view(self, tmpdir):
        c = containers.Container(os.path.join(tmpdir.strpath, 'test.h5'))
        c.open()
        c.set('utt-1', data=np.arange(20))
        c.set('utt-2', data=np.arange(20))
        c.set('utt-3', data=np.arange(20))
        c.set('utt-4', data=np.arange(20))
        c.set('utt-5', data=np.arange(20))

        corpus = resources.create_dataset()
        subview = subset.Subview(
            corpus,
            filter_criteria=[
                subset.MatchingUtteranceIdxFilter(
                    utterance_idxs={'utt-1', 'utt-2', 'utt-4'})
            ])

        it = feeding.Dataset(subview, [c])
        assert it.utt_ids == ['utt-1', 'utt-2', 'utt-4']
Exemplo n.º 29
0
    def test_write_wav_scp(self, writer, tmpdir):
        ds = resources.create_dataset()
        path = tmpdir.strpath
        writer.save(ds, path)

        content = textfile.read_separated_lines(os.path.join(path, 'wav.scp'),
                                                separator=' ',
                                                max_columns=2)

        wav_base = resources.get_resource_path(['wav_files'])
        wav_base = os.path.abspath(wav_base)

        assert content[0][0] == 'wav-1'
        assert content[0][1] == os.path.join(wav_base, 'wav_1.wav')
        assert content[1][0] == 'wav_2'
        assert content[1][1] == os.path.join(wav_base, 'wav_2.wav')
        assert content[2][0] == 'wav_3'
        assert content[2][1] == os.path.join(wav_base, 'wav_3.wav')
        assert content[3][0] == 'wav_4'
        assert content[3][1] == os.path.join(wav_base, 'wav_4.wav')
Exemplo n.º 30
0
    def test_save_spk2gender(self, writer, tmpdir):
        writer = io.KaldiWriter(create_spk2gender=True)
        ds = resources.create_dataset()
        path = tmpdir.strpath
        writer.save(ds, path)

        assert 'spk2gender' in os.listdir(path)

        content = textfile.read_separated_lines(os.path.join(
            path, 'spk2gender'),
                                                separator=' ',
                                                max_columns=2)

        assert content[0][0] == 'spk-1'
        assert content[0][1] == 'm'

        assert content[1][0] == 'spk-2'
        assert content[1][1] == 'f'

        assert content[2][0] == 'spk-3'
        assert content[2][1] == 'm'