Exemplo n.º 1
0
    def call(self, audio_data, sample_rate=None):
        """
    Caculate power spectrum and phase spectrum of audio data.
    :param audio_data: the audio signal from which to compute spectrum. Should be an (1, N) tensor.
    :param sample_rate: [option]the samplerate of the signal we working with, default is 16kHz.
    :return: Two returns:
        power spectrum —— A float tensor of size (num_frames, num_frequencies) containing
            power spectrum and of every frame in speech.
        phase spectrum —— A float tensor of size (num_frames, num_frequencies) containing
            phase spectrum and of every frame in speech.
    """

        p = self.config
        with tf.name_scope('analyfiltbank'):

            if sample_rate == None:
                sample_rate = tf.constant(p.sample_rate, dtype=tf.int32)

            assert_op = tf.assert_equal(tf.constant(p.sample_rate),
                                        tf.cast(sample_rate, dtype=tf.int32))
            with tf.control_dependencies([assert_op]):

                sample_rate = tf.cast(sample_rate, dtype=float)
                power_spectrum, phase_spectrum = py_x_ops.analyfiltbank(
                    audio_data,
                    sample_rate,
                    window_length=p.window_length,
                    frame_length=p.frame_length)

                return power_spectrum, phase_spectrum
Exemplo n.º 2
0
    def test_sfb(self):
        ''' test sfb op'''
        with self.cached_session(use_gpu=False, force_gpu=False):
            sample_rate, input_data = feat_lib.load_wav(self.wavpath, sr=16000)

            power_spc, phase_spc = py_x_ops.analyfiltbank(
                input_data, sample_rate)

            logging.info('Shape of power_spc: {}'.format(
                power_spc.eval().shape))
            logging.info('Shape of phase_spc: {}'.format(
                phase_spc.eval().shape))

            output = py_x_ops.synthfiltbank(power_spc.eval(), phase_spc.eval(),
                                            sample_rate)

            self.assertEqual(tf.rank(output).eval(), 1)
            logging.info('Shape of recovered signal: {}'.format(
                output.eval().shape))

            # beginning 400 samples are different, due to the overlap and add
            self.assertAllClose(output.eval().flatten()[500:550],
                                input_data[500:550],
                                rtol=1e-4,
                                atol=1e-4)
Exemplo n.º 3
0
    def test_afb(self):
        ''' test afb op'''
        with self.session():
            sample_rate, input_data = feat_lib.load_wav(self.wavpath, sr=16000)

            power_spc, phase_spc = py_x_ops.analyfiltbank(
                input_data, sample_rate)

            power_spc_true = np.array([
                0.000421823002, 0.000014681223, 0.000088715387, 0.000011405386,
                0.000029108920, 0.000016433882, 0.000009128947, 0.000016150383,
                0.000068095047, 0.000016092306, 0.000088840192, 0.000021255839,
                0.000033152886, 0.000005644561, 0.000012678992, 0.000009685464,
                0.000022561202, 0.000004176219, 0.000032476772, 0.000063007421,
                0.000001721088, 0.000003773108, 0.000012991571, 0.000006143227,
                0.000005361593, 0.000019796202, 0.000012828057, 0.000040009807,
                0.000009260243, 0.000060815764, 0.000036184814, 0.000018079394,
                0.000004533325, 0.000008295409, 0.000033129665, 0.000022150667,
                0.000020058087, 0.000000962711, 0.000017114238, 0.000007549510,
                0.000023227087, 0.000037615722, 0.000007189777, 0.000006701076,
                0.000016871410, 0.000018671506, 0.000006927207, 0.000004177695,
                0.000005777914, 0.000002745287
            ])

            phase_spc_true = np.array([
                3.141592741013, 0.017522372305, 2.614648103714, 1.024240016937,
                -0.082203239202, 0.177630946040, -0.947744905949,
                1.557014584541, -2.254315614700, -0.327101945877,
                -2.747241020203, -1.865882754326, -2.847117424011,
                -0.581349492073, -3.014511823654, 2.957268953323,
                1.846585988998, -1.926323652267, -2.718185901642,
                -2.704042911530, -0.473446547985, -2.938575029373,
                2.915200233459, -1.540565252304, -3.052149772644,
                2.665060997009, -2.724275827408, -2.989539623260,
                -2.875509977341, -2.549245357513, 2.585565090179,
                1.503721714020, 1.570051312447, 1.980712175369, 2.068141937256,
                -1.657162785530, 2.774835824966, -1.669888973236,
                -2.816159725189, 3.112393617630, -0.539753019810,
                2.466773271561, 2.961024999619, -1.002810001373,
                2.275165081024, -2.257984638214, -2.611628055573,
                -2.753412723541, -2.071642875671, -2.972373962402
            ])
            self.assertEqual(tf.rank(power_spc).eval(), 1)
            self.assertEqual(tf.rank(phase_spc).eval(), 1)
            #      logging.info('output1: {}'.format(output_1.eval().flatten()[:50]))
            #      logging.info('output2: {}'.format(output_2.eval().flatten()[:50]))
            self.assertAllClose(power_spc.eval().flatten()[:50],
                                power_spc_true)
            self.assertAllClose(phase_spc.eval().flatten()[:50],
                                phase_spc_true)
    def test_afb(self):
        ''' test afb op'''
        with self.session():
            sample_rate, input_data = feat_lib.load_wav(self.wavpath, sr=16000)

            power_spc, phase_spc = py_x_ops.analyfiltbank(
                input_data, sample_rate)

            power_spc_true = np.array(
                [[
                    4.2182300e-04, 3.6964193e-04, 3.9906241e-05, 2.8196722e-05,
                    3.3976138e-04, 3.7671626e-04, 2.2727624e-04, 7.2495081e-05,
                    4.3451786e-05, 3.4654513e-06
                ],
                 [
                     1.4681223e-05, 2.8831255e-05, 3.5616580e-05,
                     3.9359711e-05, 1.2714787e-04, 1.2794189e-04,
                     3.6509471e-05, 1.7578101e-05, 5.9672035e-05, 2.9785692e-06
                 ],
                 [
                     8.8715387e-05, 6.0998322e-05, 2.7695101e-05,
                     1.6866413e-04, 4.6845453e-05, 3.3532990e-05,
                     5.7005627e-06, 5.1852752e-05, 1.8390550e-05, 8.3459439e-05
                 ],
                 [
                     1.1405386e-05, 1.8942148e-06, 1.6338145e-06,
                     1.8362705e-05, 8.4106450e-06, 4.4174294e-06,
                     3.6533682e-05, 5.0541588e-05, 1.6701326e-06, 1.8736981e-05
                 ],
                 [
                     2.9108920e-05, 1.6862698e-05, 3.3437627e-05,
                     6.9332527e-05, 5.0028186e-05, 5.9426224e-05,
                     2.1895030e-06, 2.3780794e-06, 4.7786685e-05, 7.3811811e-05
                 ],
                 [
                     1.6433882e-05, 9.5777386e-07, 2.0980822e-06,
                     4.8990279e-07, 1.4232077e-05, 1.5986938e-05,
                     2.9042780e-05, 1.1719906e-05, 2.4548817e-06, 5.3594176e-06
                 ],
                 [
                     9.1289467e-06, 9.4249899e-06, 7.4781286e-07,
                     1.8923520e-05, 6.5740237e-06, 4.3209452e-06,
                     3.9396346e-06, 1.2287317e-05, 4.6807354e-06, 5.8512210e-06
                 ],
                 [
                     1.6150383e-05, 2.6649790e-05, 1.8610657e-05,
                     2.2872716e-06, 1.4209920e-05, 2.3279742e-06,
                     6.6038615e-06, 2.6169775e-05, 2.8335158e-05, 1.7595910e-06
                 ],
                 [
                     6.8095047e-05, 9.1859045e-05, 2.6713702e-05,
                     3.0580850e-05, 1.4539381e-05, 4.2510033e-05,
                     2.2579852e-05, 1.4843822e-05, 2.0883192e-05, 6.0624756e-05
                 ],
                 [
                     1.6092306e-05, 1.4245335e-05, 2.4250150e-05,
                     6.0177539e-05, 6.7926321e-06, 3.4922948e-07,
                     2.1843030e-06, 8.5554876e-07, 2.6831965e-06, 2.0012436e-05
                 ]])

            phase_spc_true = np.array(
                [[
                    3.1415927, 3.1415927, 3.1415927, 0.0, 0.0, 0.0, 0.0, 0.0,
                    0.0, 3.1415927
                ],
                 [
                     0.01752237, 1.6688037, 1.4971976, 1.4470094, 2.0516894,
                     -2.3112175, -0.7115377, 2.9614341, -1.2494497, -0.7055688
                 ],
                 [
                     2.614648, 0.63351387, -2.0660093, 1.7626916, -1.1257634,
                     3.017448, -2.892095, -1.2209401, 1.7407895, -1.0281658
                 ],
                 [
                     1.02424, -1.8967879, -0.6139833, 2.587602, 3.0070715,
                     1.5781559, -1.899145, -1.1459525, -0.24284656, -0.8106653
                 ],
                 [
                     -0.08220324, 0.5497215, 1.7031444, -2.8960562, -1.3680246,
                     0.4349923, 2.0676146, 1.2389332, 2.6312854, -1.7511902
                 ],
                 [
                     0.17763095, 2.7475302, -0.20671827, 1.0719725, -2.388657,
                     1.189566, -1.0643665, 2.5955305, -0.69036585, -0.5287417
                 ],
                 [
                     -0.9477449, -2.7059674, 0.53469753, 1.9289348, 0.24833842,
                     0.03517391, -1.4778724, -0.16577117, -1.7509687,
                     -0.46875867
                 ],
                 [
                     1.5570146, -2.9596932, -0.7975963, 3.0060582, -1.038453,
                     0.14911443, -1.5873562, 0.7229206, 2.679422, -1.1890441
                 ],
                 [
                     -2.2543156, 0.47845784, -2.8412538, -0.5494534, 1.6583048,
                     -1.4567885, 1.0724461, -2.70243, -0.2690962, 1.8831034
                 ],
                 [
                     -0.32710192, 0.01503609, 0.29720783, -0.7409194,
                     -2.183623, 2.3637679, 0.6405145, 1.4975713, 0.18241015,
                     2.2659144
                 ]])
            self.assertEqual(tf.rank(power_spc).eval(), 2)
            self.assertEqual(tf.rank(phase_spc).eval(), 2)
            logging.info('power_spc shape: {}'.format(power_spc.shape))
            logging.info('phase_spc shape: {}'.format(phase_spc.shape))
            self.assertAllClose(power_spc.eval().transpose()[:10, :10],
                                power_spc_true)
            self.assertAllClose(phase_spc.eval().transpose()[:10, :10],
                                phase_spc_true)