def test_find_textgrid_in_alternate_dir(self): wfn = 'beijing_f3_50_a.wav' tfn = 'beijing_f3_50_a.TextGrid' tmp1 = self.tmpdir() tmp2 = self.tmpdir() shutil.copy(data_file_path(wfn), tmp1) shutil.copy(data_file_path(tfn), tmp2) s = SoundFile(os.path.join(tmp1, wfn), tgdir=tmp2) self.assertNotEqual(s.textgrid, None) self.assertEqual(s.tgpath, os.path.join(tmp2, tfn))
def test_find_textgrid_using_full_spec(self): wfn = 'beijing_f3_50_a.wav' tfn = 'beijing_f3_50_a.TextGrid' tmp1 = self.tmpdir() tmp2 = self.tmpdir() shutil.copy(data_file_path(wfn), tmp1) newtpath = os.path.join(tmp2, 'foo.bar') shutil.copy(data_file_path(tfn), newtpath) s = SoundFile(os.path.join(tmp1, wfn), tgdir=tmp2, tgfn='foo.bar') self.assertNotEqual(s.textgrid, None) self.assertEqual(s.tgpath, newtpath)
def test_load_wav_file(self): spath = data_file_path('beijing_f3_50_a.wav') s = SoundFile(spath) self.assertEqual(s.wavpath, spath) data, fs = wavread(spath) np.testing.assert_array_equal(data, s.wavdata) self.assertEqual(fs, s.fs)
def test_output_filepath(self): tmp = self.tmpdir() outfile = os.path.join(tmp, 'output.txt') CLI(['--include-f0', '-o', outfile, data_file_path('beijing_f3_50_a.wav')]).process() with open(outfile) as f: self.assertEqual(len(list(f.readlines())), 589)
def test_include_empty_lables(self): lines = self._CLI_output([ '--measurements', 'snackF0', '--include-empty-labels', data_file_path('beijing_f3_50_a.wav') ]) self.assertEqual(len(lines), 2347) self.assertEqual(len([x for x in lines if 'C1' in x]), 101)
def pitch_algo3_as_window_size(self, pitch_algo, v100): lines = self._CLI_output([ '--f0', pitch_algo, '--include-F0', '--window-size', '10', data_file_path('beijing_f3_50_a.wav'), ]) self.assertEqual(lines[100], self.line100_prefix + [v100])
def pitch_algo5_as_max_f0(self, pitch_algo, v100): lines = self._CLI_output([ '--f0', pitch_algo, '--include-F0', '--max-f0', '200', data_file_path('beijing_f3_50_a.wav'), ]) self.assertEqual(lines[100], self.line100_prefix + [v100])
def pitch_algo1_as_default_settings(self, pitch_algo, line_count, v100): lines = self._CLI_output([ '--f0', pitch_algo, '--include-F0', data_file_path('beijing_f3_50_a.wav'), ]) self.assertEqual(len(lines), line_count) self.assertEqual(lines[100], self.line100_prefix + [v100])
def pitch_algo2_as_frame_shift(self, pitch_algo): lines = self._CLI_output([ '--f0', pitch_algo, '--include-F0', '--frame-shift', '2', data_file_path('beijing_f3_50_a.wav'), ]) self.assertEqual(len(lines), 297)
def test_multiple_measurements(self): lines = self._CLI_output([ data_file_path('beijing_f3_50_a.wav'), '--measurements', 'shrF0', 'snackF0', 'SHR', ]) self.assertEqual(len(lines), 589) self.assertEqual(lines[0][-3:], ['shrF0', 'snackF0', 'SHR']) self.assertEqual(len(lines[1]), 8)
def test_wavread(self): fn = data_file_path('beijing_f3_50_a.wav') samples, Fs = wavread(fn) expected = loadmat('beijing_f3_50_a-wavread-expected') self.assertEqual(Fs, expected['Fs']) # XXX may need to use allclose here instead of array_equal. if not np.array_equal(samples, expected['y']): # Produce a useful error message for debugging. self.assertEqual(list(samples), list(expected['y']))
def test_multiple_input_files(self): lines = self._CLI_output([ '--measurements', 'snackF0', '--include-empty-labels', data_file_path('beijing_f3_50_a.wav'), data_file_path('beijing_m5_17_c.wav'), data_file_path('hmong_f4_24_d.wav'), ]) self.assertEqual(len(lines), 6121) # The first of these is one less than the number lines in the single # file equivalent test above because there we were counting the header # line and here we are not. self.assertEqual(len([x for x in lines if 'beijing_f3_50_a.wav' in x]), 2346) self.assertEqual(len([x for x in lines if 'beijing_m5_17_c.wav' in x]), 1673) self.assertEqual(len([x for x in lines if 'hmong_f4_24_d.wav' in x]), 2101)
def test_snackF0(self): lines = self._CLI_output([ data_file_path('beijing_f3_50_a.wav'), '--measurements', 'snackF0', ]) self.assertEqual(len(lines), 589) self.assertEqual(len([x for x in lines if 'C1' in x]), 101) self.assertEqual(len([x for x in lines if 'V1' in x]), 209) self.assertEqual(len([x for x in lines if 'C2' in x]), 119) self.assertEqual(len([x for x in lines if 'V2' in x]), 159)
def test_settings_default_file(self): settingsfn = self._make_file(""" include-empty-labels """) with self.patch(CLI, 'settings_locs', [settingsfn]): lines = self._CLI_output([ data_file_path('beijing_f3_50_a.wav'), '--measurements', 'snackF0', ]) self.assertEqual(len(lines), 2347)
def test_ignore_label(self): lines = self._CLI_output([ '--measurements', 'snackF0', '--ignore-label', 'C2', data_file_path('beijing_f3_50_a.wav') ]) self.assertEqual(len(lines), 589 - 119) self.assertEqual(len([x for x in lines if 'C1' in x]), 101) self.assertEqual(len([x for x in lines if 'V1' in x]), 209) self.assertEqual(len([x for x in lines if 'C2' in x]), 0) self.assertEqual(len([x for x in lines if 'V2' in x]), 159)
def test_measurements_default_file(self): measurefn = self._make_file(""" snackF0 shrF0 """) with self.patch(CLI, 'measurements_locs', [measurefn]): lines = self._CLI_output([ data_file_path('beijing_f3_50_a.wav'), ]) self.assertEqual(len(lines), 589) self.assertEqual(len(lines[1]), 7)
def test_settings(self): settingsfn = self._make_file(""" include-empty-labels ignore-label C2 """) lines = self._CLI_output([ '--settings', settingsfn, data_file_path('beijing_f3_50_a.wav'), '--measurements', 'snackF0', ]) self.assertEqual(len(lines), 2347 - 119) self.assertEqual(len([x for x in lines if 'C2' in x]), 0)
def test_alternate_F0(self): lines = self._CLI_output([ '--F0', 'shrF0', '--include-F0', data_file_path('beijing_f3_50_a.wav'), ]) self.assertEqual(len(lines), 589) self.assertEqual(lines[0][-1:], ['shrF0']) self.assertEqual(len(lines[1]), 6) self.assertEqual(lines[100], ['beijing_f3_50_a.wav', 'C1', '0.766', '0.866', '865.000', '230.220'])
def test_measurements_from_file(self): measurefn = self._make_file(""" snackF0 shrF0 """) lines = self._CLI_output([ "--default-measurements-file", measurefn, data_file_path('beijing_f3_50_a.wav'), ]) self.assertEqual(len(lines), 589) self.assertEqual(lines[0][-2:], ['snackF0', 'shrF0']) self.assertEqual(len(lines[1]), 7)
def test_measurements_in_settings(self): settingsfn = self._make_file(""" measurements snackF0 include-empty-labels """) lines = self._CLI_output([ '--settings', settingsfn, data_file_path('beijing_f3_50_a.wav'), ]) self.assertEqual(len(lines), 2347) self.assertIn('snackF0', lines[0]) self.assertEqual(len(lines[1]), 6)
def test_resample_wav(self): for fn in wav_fns: t = self.tmpdir() tmp_path = os.path.join(t, os.path.basename(fn)) shutil.copy(fn, tmp_path) s = SoundFile(tmp_path, resample_freq=16000) data, data_int, fs = wavread(s.wavpath_rs) resample_fn = os.path.splitext(os.path.basename(fn))[0] + '-resample-16kHz.wav' data_test, data_test_int, fs_test = wavread(data_file_path(os.path.join('soundfile', 'resample', resample_fn))) self.assertEqual(fs, fs_test) self.assertEqual(len(data), len(data_test)) self.assertAllClose(data, data_test, rtol=1e-05, atol=1e-08)
def test_raw_resample_data(self): for fn in wav_fns: t = self.tmpdir() tmp_path = os.path.join(t, os.path.basename(fn)) shutil.copy(fn, tmp_path) s = SoundFile(tmp_path, resample_freq=16000) resample_fn = os.path.splitext(os.path.basename(fn))[0] + '-raw-resample-16kHz.txt' data = np.loadtxt(data_file_path(os.path.join('soundfile', 'resample', resample_fn))) self.assertEqual(len(s.wavdata_rs), len(data)) self.assertAllClose(s.wavdata_rs, data, rtol=1e-05, atol=1e-08) self.assertTrue(s.wavdata_rs_int.dtype == 'int16') self.assertAllClose(np.int16(data * 32768), s.wavdata_rs_int, rtol=1e-05, atol=1e-08)
def test_no_textgrid(self): fn = 'beijing_f3_50_a.wav' t = self.tmpdir() spath = os.path.join(t, fn) shutil.copy(data_file_path('beijing_f3_50_a.wav'), spath) s = SoundFile(spath) self.assertEqual(s.textgrid, None) with self.assertRaises(ValueError) as cx: s.textgrid_intervals msg = str(cx.exception) self.assertIn(os.path.basename(fn)[0], msg) self.assertIn(t, msg) self.assertIn('TextGrid', msg)
def example_as_textgrid_input(self, basename, expected): wav_fn = 'beijing_f3_50_a.wav' t = self.tmpdir() tmp_path = os.path.join(t, wav_fn) shutil.copy(sound_file_path(wav_fn), tmp_path) shutil.copy(data_file_path(os.path.join('soundfile', 'textgrid', basename + '.TextGrid')), os.path.join(t, 'beijing_f3_50_a.TextGrid')) s = SoundFile(tmp_path) actual = s.textgrid_intervals for i in range(len(actual)): self.assertEqual(actual[i][0], expected[i][0], 'row %s' % i) self.assertAlmostEqual(actual[i][1], expected[i][1], msg='elt %s,1' % i) self.assertAlmostEqual(actual[i][2], expected[i][2], msg='elt %s,2' % i)
def test_remove_empty_lines_from_file(self): # Copy test file and remove extra newlines from it fn = 'extra_newlines.txt' t = self.tmpdir() tmp_path = os.path.join(t, fn) shutil.copy(data_file_path(os.path.join('helpers', fn)), tmp_path) remove_empty_lines_from_file(tmp_path) # Read lines from original test file with open(data_file_path(os.path.join('helpers', fn))) as f: lines_orig = f.readlines() # Read lines from copy with extra newlines removed with open(tmp_path) as f: lines_rm = f.readlines() # Original file should be longer than copy with extra newslines removed self.assertTrue(len(lines_orig) > len(lines_rm)) # Original and copy should match except for the removed newlines idx = 0 for line_orig in lines_orig: if line_orig.rstrip(): self.assertEqual(line_orig, lines_rm[idx]) idx += 1 # Copy should not contain any extra information self.assertEqual(idx, len(lines_rm))
def test_textgrid_intervals(self): s = SoundFile(data_file_path('beijing_f3_50_a.wav')) expected = ( ('', 0, 0.7660623496874233), ('C1', 0.7660623496874233, 0.865632223379142), ('V1', 0.865632223379142, 1.0740775664347026), ('C2', 1.0740775664347026, 1.1922586314706678), ('V2', 1.1922586314706678, 1.350453757896763), ('', 1.350453757896763, 2.34), ) actual = s.textgrid_intervals for i in range(len(actual)): self.assertEqual(actual[i][0], expected[i][0], 'row %s' % i) self.assertAlmostEqual(actual[i][1], expected[i][1], 'elt %s,1' % i) self.assertAlmostEqual(actual[i][2], expected[i][2], 'elt %s,2' % i)
def test_no_textgrid(self): lines = self._CLI_output([ '--measurements', 'snackF0', '--no-textgrid', data_file_path('beijing_f3_50_a.wav') ]) # The textgrid output has a repeated frame offset at the end and # beginning of each block. Since there are six blocks (including the # ones with blank labels) in this sample, there are five more records # in the --include-empty-labels case above than there are here, where # we have no repeated frames. self.assertEqual(len(lines), 2342) self.assertEqual(len([x for x in lines if 'C1' in x]), 0) self.assertEqual(len([x for x in lines if 'V1' in x]), 0) self.assertEqual(len([x for x in lines if 'C2' in x]), 0) self.assertEqual(len([x for x in lines if 'V2' in x]), 0)
def test_default_setup(self): tmp = self.tmpdir() def d(fn): return os.path.join(tmp, fn) os.mkdir(d('output')) copytree('defaults', d('defaults')) p = Popen(['python', 'opensauce/process.py', '-i', d('defaults/sounds'), '-o', d('output'), '-s', d('defaults/settings/default.csv'), '-p', d('defaults/parameters/default.csv'), ], stdout=PIPE, ) # For now, just ignore the output. p.stdout.read() rc = p.wait() self.assertEqual(rc, 0) self.assertTrue(filecmp.cmp(d('defaults/sounds/cant_c5_19a.f0'), data_file_path('cant_c5_19a.f0')))
def test_wavread_formats(self): # 16-bit PCM file should be read correctly fn = data_file_path(os.path.join('helpers', 'wav-formats', 'pcm-16bit.wav')) samples, samples_int, Fs = wavread(fn) # Other WAV file formats should raise IOError with self.assertRaisesRegex(IOError, 'Input WAV file must be in 16-bit integer PCM format'): fn = data_file_path(os.path.join('helpers', 'wav-formats', 'pcm-8bit.wav')) samples, samples_int, Fs = wavread(fn) with self.assertRaisesRegex(IOError, 'Input WAV file must be in 16-bit integer PCM format'): fn = data_file_path(os.path.join('helpers', 'wav-formats', 'pcm-32bit.wav')) samples, samples_int, Fs = wavread(fn) with self.assertRaisesRegex(IOError, 'Input WAV file must be in 16-bit integer PCM format'): fn = data_file_path(os.path.join('helpers', 'wav-formats', 'float-32bit.wav')) samples, samples_int, Fs = wavread(fn) with self.assertRaisesRegex(IOError, 'Input WAV file must be in 16-bit integer PCM format'): fn = data_file_path(os.path.join('helpers', 'wav-formats', 'float-64bit.wav')) samples, samples_int, Fs = wavread(fn) # SciPy does not support 24-bit PCM format with self.assertRaisesRegex(ValueError, 'Unsupported bit depth: the wav file has 24-bit data.'): fn = data_file_path(os.path.join('helpers', 'wav-formats', 'pcm-24bit.wav')) samples, samples_int, Fs = wavread(fn)
def test_wavread_formats(self): # 16-bit PCM file should be read correctly fn = data_file_path( os.path.join('helpers', 'wav-formats', 'pcm-16bit.wav')) samples, samples_int, Fs = wavread(fn) # Other WAV file formats should raise IOError with self.assertRaisesRegex( IOError, 'Input WAV file must be in 16-bit integer PCM format'): fn = data_file_path( os.path.join('helpers', 'wav-formats', 'pcm-8bit.wav')) samples, samples_int, Fs = wavread(fn) with self.assertRaisesRegex( IOError, 'Input WAV file must be in 16-bit integer PCM format'): fn = data_file_path( os.path.join('helpers', 'wav-formats', 'pcm-32bit.wav')) samples, samples_int, Fs = wavread(fn) with self.assertRaisesRegex( IOError, 'Input WAV file must be in 16-bit integer PCM format'): fn = data_file_path( os.path.join('helpers', 'wav-formats', 'float-32bit.wav')) samples, samples_int, Fs = wavread(fn) with self.assertRaisesRegex( IOError, 'Input WAV file must be in 16-bit integer PCM format'): fn = data_file_path( os.path.join('helpers', 'wav-formats', 'float-64bit.wav')) samples, samples_int, Fs = wavread(fn) # SciPy does not support 24-bit PCM format with self.assertRaisesRegex( ValueError, 'Unsupported bit depth: the wav file has 24-bit data.'): fn = data_file_path( os.path.join('helpers', 'wav-formats', 'pcm-24bit.wav')) samples, samples_int, Fs = wavread(fn)
def test_find_textgrid_using_defaults(self): spath = data_file_path('beijing_f3_50_a.wav') s = SoundFile(spath) self.assertNotEqual(s.textgrid, None) # For this test, just make sure this doesn't raise. s.textgrid_intervals