Пример #1
0
    def params(cls, config=None):
        """
        Set params.
        :param config: contains four optional parameters:
            --window_length		: Window length in seconds. (float, default = 0.025)
            --frame_length			: Hop length in seconds. (float, default = 0.010)
            --snip_edges			: If True, the last frame (shorter than window_length)
                                      will be cutoff. If False, 1 // 2 frame_length data will
                                      be padded to data. (int, default = True)
            --remove_dc_offset		: Subtract mean from waveform on each frame (bool, default = true)
        :return:An object of class HParams, which is a set of hyperparameters as name-value pairs.
        """

        window_length = 0.025
        frame_length = 0.010
        snip_edges = 1
        remove_dc_offset = True

        hparams = HParams(cls=cls)
        hparams.add_hparam("window_length", window_length)
        hparams.add_hparam("frame_length", frame_length)
        hparams.add_hparam("snip_edges", snip_edges)
        hparams.add_hparam("remove_dc_offset", remove_dc_offset)

        if config is not None:
            hparams.parse(config, True)

        return hparams
Пример #2
0
    def params(cls, config=None):
        """ Set params.

        Args:
            config: contains the following four optional parameters:

            'type': Type of Opration. (string, default = 'CMVN')
            'global_mean': Global mean of features. (float, default = 0.0)
            'global_variance': Global variance of features. (float, default = 1.0)
            'local_cmvn': If ture, local cmvn will be done on features. (bool, default = False)

        Note:
            Return an object of class HParams, which is a set of hyperparameters as
            name-value pairs.
        """

        hparams = HParams(cls=cls)
        hparams.add_hparam("type", "CMVN")
        hparams.add_hparam("global_mean", [0.0])
        hparams.add_hparam("global_variance", [1.0])
        hparams.add_hparam("local_cmvn", False)

        if config is not None:
            hparams.parse(config, True)

        assert len(hparams.global_mean) == len(
            hparams.global_variance
        ), "Error, global_mean length {} is not equals to global_variance length {}".format(
            len(hparams.global_mean), len(hparams.global_variance)
        )

        hparams.global_variance = (np.sqrt(hparams.global_variance) + 1e-6).tolist()
        return hparams
Пример #3
0
    def params(cls, config=None):
        """Set params.

        Args:
            config: contains the following four optional parameters:

            'window_length': Window length in seconds. (float, default = 0.025)
            'frame_length': Hop length in seconds. (float, default = 0.010)
            'snip_edges': If 1, the last frame (shorter than window_length) will be
                          cutoff. If 2, 1 // 2 frame_length data will be padded
                          to data. (int, default = 1)
            'remove_dc_offset': Subtract mean from waveform on each frame.
                                (bool, default = true)

        Note:
            Return an object of class HParams, which is a set of hyperparameters as
            name-value pairs.
        """

        window_length = 0.025
        frame_length = 0.010
        snip_edges = 1
        remove_dc_offset = True

        hparams = HParams(cls=cls)
        hparams.add_hparam("window_length", window_length)
        hparams.add_hparam("frame_length", frame_length)
        hparams.add_hparam("snip_edges", snip_edges)
        hparams.add_hparam("remove_dc_offset", remove_dc_offset)

        if config is not None:
            hparams.parse(config, True)

        return hparams
Пример #4
0
    def params(cls, config=None):
        """
        Set params.
        :param config: contains nine optional parameters:
              --window_length		: Window length in seconds. (float, default = 0.025)
              --frame_length		: Hop length in seconds. (float, default = 0.010)
              --snip_edges			: If 1, the last frame (shorter than window_length)
                                      will be cutoff. If 2, 1 // 2 frame_length data will
                                      be padded to data. (int, default = 1)
              ---raw_energy			: If 1, compute frame energy before preemphasis and windowing.
                                      If 2,  compute frame energy after preemphasis and windowing.
                                      (int, default = 1)
              --preEph_coeff		: Coefficient for use in frame-signal preemphasis.
                                     (float, default = 0.97)
              --window_type			: Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                                      (string, default = "povey")
              --remove_dc_offset	: Subtract mean from waveform on each frame.
                                     (bool, default = true)
              --is_fbank			: If true, compute power spetrum without frame energy.
                                      If false, using the frame energy instead of the square of the
                                      constant component of the signal. (bool, default = false)
              --output_type			: If 1, return power spectrum. If 2, return log-power spectrum.
                                      (int, default = 2)
              --dither		        : Dithering constant (0.0 means no dither).
                                     (float, default = 1) [add robust to training]
        :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
        """

        window_length = 0.025
        frame_length = 0.010
        output_type = 2
        snip_edges = 1
        raw_energy = 1
        preEph_coeff = 0.97
        window_type = "povey"
        remove_dc_offset = True
        is_fbank = False
        dither = 0.0

        hparams = HParams(cls=cls)
        hparams.add_hparam("window_length", window_length)
        hparams.add_hparam("frame_length", frame_length)
        hparams.add_hparam("output_type", output_type)
        hparams.add_hparam("snip_edges", snip_edges)
        hparams.add_hparam("raw_energy", raw_energy)
        hparams.add_hparam("preEph_coeff", preEph_coeff)
        hparams.add_hparam("window_type", window_type)
        hparams.add_hparam("remove_dc_offset", remove_dc_offset)
        hparams.add_hparam("is_fbank", is_fbank)
        hparams.add_hparam("dither", dither)

        # cmvn
        hparams.append(CMVN.params())

        if config is not None:
            hparams.parse(config, True)
        hparams.type = "Spectrum"

        return hparams
Пример #5
0
    def params(cls, config=None):
        """
          Set params.
           :param config: contains one optional parameters: audio_channels(int, default=1).
           :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
           """
        audio_channels = 1

        hparams = HParams(cls=cls)
        hparams.add_hparam('type', 'ReadWav')
        hparams.add_hparam('audio_channels', audio_channels)

        if config is not None:
            hparams.parse(config, True)

        return hparams
Пример #6
0
    def params(cls, config=None):
        """ set params """

        hparams = HParams(cls=cls)
        hparams.add_hparam("type", "CMVN")
        hparams.add_hparam("global_mean", [0.0])
        hparams.add_hparam("global_variance", [1.0])
        hparams.add_hparam("local_cmvn", False)

        if config is not None:
            hparams.parse(config, True)

        assert len(hparams.global_mean) == len(
            hparams.global_variance
        ), "Error, global_mean length {} is not equals to global_variance length {}".format(
            len(hparams.global_mean), len(hparams.global_variance))

        hparams.global_variance = (np.sqrt(hparams.global_variance) +
                                   1e-6).tolist()
        return hparams
Пример #7
0
    def params(cls, config=None):
        """
    Set params.
    :param config: contains thirteen optional parameters:upper_frequency_limit(float, default=0),
    lower_frequency_limit(float, default=60.0), filterbank_channel_count(float, default=40.0),
    :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """

        hparams = HParams(cls=cls)

        # spectrum
        hparams.append(Spectrum.params({"output_type": 1, "is_fbank": True}))

        # fbank
        upper_frequency_limit = 0
        lower_frequency_limit = 60
        filterbank_channel_count = 40
        hparams.add_hparam("upper_frequency_limit", upper_frequency_limit)
        hparams.add_hparam("lower_frequency_limit", lower_frequency_limit)
        hparams.add_hparam("filterbank_channel_count",
                           filterbank_channel_count)

        # delta
        delta_delta = False  # True
        order = 2
        window = 2
        hparams.add_hparam("delta_delta", delta_delta)
        hparams.add_hparam("order", order)
        hparams.add_hparam("window", window)

        if config is not None:
            hparams.parse(config, True)

        hparams.type = "Fbank"

        hparams.add_hparam("channel", 1)
        if hparams.delta_delta:
            hparams.channel = hparams.order + 1

        return hparams
Пример #8
0
    def params(cls, config=None):
        """Set params.

        Args:
           config: contains the following two optional parameters

           'type': 'ReadWav'.
           'audio_channels': index of the desired channel. (default=1)

        Note:
            Return an object of class HParams, which is a set of hyperparameters as
            name-value pairs.
        """
        audio_channels = 1

        hparams = HParams(cls=cls)
        hparams.add_hparam('type', 'ReadWav')
        hparams.add_hparam('audio_channels', audio_channels)

        if config is not None:
            hparams.parse(config, True)

        return hparams
Пример #9
0
    def params(cls, config=None):
        """Set params.

        Args:
            config: contains the following nineteen optional parameters:

            'delta_pitch': Smallest relative change in pitch that our algorithm
                            measures (float, default = 0.005)
            'window_length': Frame length in seconds (float, default = 0.025)
            'frame_length': Frame shift in seconds (float, default = 0.010)
            'frames-per-chunk': Only relevant for offline pitch extraction (e.g.
                                compute-kaldi-pitch-feats), you can set it to a small
                                nonzero value, such as 10, for better feature
                                compatibility with online decoding (affects energy
                                normalization in the algorithm) (int, default = 0)
            'lowpass-cutoff': cutoff frequency for LowPass filter (Hz). (float, default = 1000)
            'lowpass-filter-width': Integer that determines filter width of lowpass filter,
                                    more gives sharper filter (int, default = 1)
            'max-f0': max. F0 to search for (Hz) (float, default = 400)
            'max-frames-latency': Maximum number of frames of latency that we allow pitch
                                   tracking to introduce into the feature processing
                                   (affects output only if --frames-per-chunk > 0 and
                                   --simulate-first-pass-online=true (int, default = 0)
            'min-f0': min. F0 to search for (Hz) (float, default = 50)
            'nccf-ballast': Increasing this factor reduces NCCF for quiet frames.
                            (float, default = 7000)
            'nccf-ballast-online': This is useful mainly for debug; it affects how the NCCF
                                    ballast is computed. (bool, default = false)
            'penalty-factor': cost factor for FO change. (float, default = 0.1)
            'preemphasis-coefficient': Coefficient for use in signal preemphasis (deprecated).
                                       (float, default = 0)
            'recompute-frame': Only relevant for online pitch extraction, or for
                                compatibility with online pitch extraction.  A
                                non-critical parameter; the frame at which we recompute
                                some of the forward pointers, after revising our
                                estimate of the signal energy.  Relevant
                                if--frames-per-chunk > 0. (int, default = 500)
            'resample-frequency': Frequency that we down-sample the signal to.  Must be
                                   more than twice lowpass-cutoff (float, default = 4000)
            'simulate-first-pass-online': If true, compute-kaldi-pitch-feats will output features
                                        that correspond to what an online decoder would see in
                                        the first pass of decoding-- not the final version of
                                        the features, which is the default.  Relevant if
                                        --frames-per-chunk > 0 (bool, default = false)
            'snip-edges': If this is set to false, the incomplete frames near the
                            ending edge won't be snipped, so that the number of
                            frames is the file size divided by the frame-shift.
                            This makes different types of features give the same
                            number of frames. (bool, default = true)
            'soft-min-f0': Minimum f0, applied in soft way, must not exceed min-f0.
                            (float, default = 10)
            'upsample-filter-width': Integer that determines filter width when upsampling
                                    NCCF. (int, default = 5)

        Note:
            Return an object of class HParams, which is a set of hyperparameters as
            name-value pairs.
        """

        hparams = HParams(cls=cls)

        window_length = 0.025
        frame_length = 0.010
        sample_rate = 16000
        snip_edges = True
        preemph_coeff = 0.0
        min_f0 = 50.0
        max_f0 = 400.0
        soft_min_f0 = 10.0
        penalty_factor = 0.1
        lowpass_cutoff = 1000.0
        resample_freq = 4000.0
        delta_pitch = 0.005
        nccf_ballast = 7000.0
        lowpass_filter_width = 1
        upsample_filter_width = 5
        max_frames_latency = 0
        frames_per_chunk = 0
        simulate_first_pass_online = False
        recompute_frame = 500
        nccf_ballast_online = False

        hparams.add_hparam("window_length", window_length)
        hparams.add_hparam("frame_length", frame_length)
        hparams.add_hparam("sample_rate", sample_rate)
        hparams.add_hparam("snip_edges", snip_edges)
        hparams.add_hparam("preemph_coeff", preemph_coeff)
        hparams.add_hparam("min_f0", min_f0)
        hparams.add_hparam("max_f0", max_f0)
        hparams.add_hparam("soft_min_f0", soft_min_f0)
        hparams.add_hparam("penalty_factor", penalty_factor)
        hparams.add_hparam("lowpass_cutoff", lowpass_cutoff)
        hparams.add_hparam("resample_freq", resample_freq)
        hparams.add_hparam("delta_pitch", delta_pitch)
        hparams.add_hparam("nccf_ballast", nccf_ballast)
        hparams.add_hparam("lowpass_filter_width", lowpass_filter_width)
        hparams.add_hparam("upsample_filter_width", upsample_filter_width)
        hparams.add_hparam("max_frames_latency", max_frames_latency)
        hparams.add_hparam("frames_per_chunk", frames_per_chunk)
        hparams.add_hparam("simulate_first_pass_online",
                           simulate_first_pass_online)
        hparams.add_hparam("recompute_frame", recompute_frame)
        hparams.add_hparam("nccf_ballast_online", nccf_ballast_online)

        if config is not None:
            hparams.parse(config, True)

        return hparams
Пример #10
0
    def params(cls, config=None):
        """Set params.

        Args:
            config: contains the following ten optional parameters:

            'window_length': Window length in seconds. (float, default = 0.025),
            'frame_length': Hop length in seconds. (float, default = 0.010),
            'snip_edges': If 1, the last frame (shorter than window_length) will be
                          cutoff. If 2, 1 // 2 frame_length data will be padded
                          to data. (int, default = 1),
            'preEph_coeff': Coefficient for use in frame-signal preemphasis.
                            (float, default = 0.97),
            'window_type': Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                            (string, default = "povey")
            'remove_dc_offset': Subtract mean from waveform on each frame.
                                (bool, default = true)
            'is_fbank': If true, compute power spetrum without frame energy.
                          If false, using the frame energy instead of the
                          square of the constant component of the signal.
                          (bool, default = false)
            'output_type': If 1, return power spectrum. If 2, return log-power
                            spectrum. If 3, return magnitude spectrum. (int, default = 2)
            'upper_frequency_limit': High cutoff frequency for mel bins (if <= 0, offset
                                      from Nyquist) (float, default = 0)
            'dither': Dithering constant (0.0 means no dither).
                      (float, default = 1) [add robust to training]

        Note:
            Return an object of class HParams, which is a set of hyperparameters as
            name-value pairs.
        """

        window_length = 0.025
        frame_length = 0.010
        output_type = 2
        snip_edges = 1
        raw_energy = 1
        preEph_coeff = 0.97
        window_type = "povey"
        remove_dc_offset = True
        is_fbank = False
        dither = 0.0

        hparams = HParams(cls=cls)
        hparams.add_hparam("window_length", window_length)
        hparams.add_hparam("frame_length", frame_length)
        hparams.add_hparam("output_type", output_type)
        hparams.add_hparam("snip_edges", snip_edges)
        hparams.add_hparam("raw_energy", raw_energy)
        hparams.add_hparam("preEph_coeff", preEph_coeff)
        hparams.add_hparam("window_type", window_type)
        hparams.add_hparam("remove_dc_offset", remove_dc_offset)
        hparams.add_hparam("is_fbank", is_fbank)
        hparams.add_hparam("dither", dither)

        # cmvn
        hparams.append(CMVN.params())

        if config is not None:
            hparams.parse(config, True)
        hparams.type = "Spectrum"

        return hparams
Пример #11
0
    def params(cls, config=None):
        """
        Set params.
        :param config: contains twenty-nine optional parameters:t
              --window_length		      : Window length in seconds. (float, default = 0.025)
              --frame_length			  : Hop length in seconds. (float, default = 0.010)
              --snip_edges				  : If 1, the last frame (shorter than window_length) will
                                            be cutoff. If 2, 1 // 2 frame_length data will be padded
                                             to data. (int, default = 1)
              ---raw_energy				  : If 1, compute frame energy before preemphasis and
                                            windowing. If 2,  compute frame energy after preemphasis
                                             and windowing. (int, default = 1)
              --preEph_coeff			  : Coefficient for use in frame-signal preemphasis.
                                            (float, default = 0.97)
              --window_type				  : Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                                            (string, default = "povey")
              --remove_dc_offset	      : Subtract mean from waveform on each frame.
                                            (bool, default = true)
              --is_fbank				  : If true, compute power spetrum without frame
                                            energy. If false, using the frame energy instead
                                             of the square of the constant component of the
                                             signal. (bool, default = true)
              --output_type				  : If 1, return power spectrum. If 2, return
                                            log-power spectrum. (int, default = 1)
              --upper_frequency_limit	  : High cutoff frequency for mel bins.
                                            (if <= 0, offset from Nyquist) (float, default = 0)
              --lower_frequency_limit	  : Low cutoff frequency for mel bins.
                                            (float, default = 20)
              --filterbank_channel_count  : Number of triangular mel-frequency bins.
                                            (float, default = 23)
              --dither			    	  : Dithering constant (0.0 means no dither).
                                            (float, default = 1)
                [add robust to training]
              --delta-pitch               : Smallest relative change in pitch that our
                                            algorithm measures. (float, default = 0.005)
              --frames-per-chunk          : Only relevant for offline pitch extraction.
                                            (e.g. compute-kaldi-pitch-feats), you can set it to a
                                            small nonzero value, such as 10, for better feature
                                            compatibility with online decoding (affects energy
                                            normalization in the algorithm) (int, default = 0)
              --lowpass-cutoff            : cutoff frequency for LowPass filter (Hz).
                                            (float, default = 1000)
              --lowpass-filter-width      : Integer that determines filter width of lowpass filter,
                                            more gives sharper filter (int, default = 1)
              --max-f0                    : max. F0 to search for (Hz) (float, default = 400)
              --max-frames-latency        : Maximum number of frames of latency that we allow pitch
                                            tracking to introduce into the feature processing
                                            (affects output only if --frames-per-chunk > 0 and
                                            --simulate-first-pass-online=true (int, default = 0)
              --min-f0                    : min. F0 to search for (Hz) (float, default = 50)
              --nccf-ballast              : Increasing this factor reduces NCCF for quiet frames.
                                            (float, default = 7000)
              --nccf-ballast-online       : This is useful mainly for debug; it affects how the
                                            NCCF ballast is computed. (bool, default = false)
              --penalty-factor            : cost factor for FO change. (float, default = 0.1)
              --preemphasis-coefficient   : Coefficient for use in signal preemphasis (deprecated)
                                            (float, default = 0)
              --recompute-frame           : Only relevant for online pitch extraction, or for
                                            compatibility with online pitch extraction.  A
                                            non-critical parameter; the frame at which we recompute
                                            some of the forward pointers, after revising our
                                            estimate of the signal energy. Relevant
                                            if--frames-per-chunk > 0. (int, default = 500)
              --resample-frequency        : Frequency that we down-sample the signal to. Must be
                                            more than twice lowpass-cutoff (float, default = 4000)
              --simulate-first-pass-online : If true, compute-kaldi-pitch-feats will output features
                                             that correspond to what an online decoder would see in
                                             the first pass of decoding-- not the final version of
                                             the features, which is the default.  Relevant if
                                             --frames-per-chunk > 0 (bool, default = false)
              --soft-min-f0               : Minimum f0, applied in soft way, must not exceed
                                            min-f0 (float, default = 10)
              --upsample-filter-width     : Integer that determines filter width when upsampling
                                            NCCF (int, default = 5)
        :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
        """
        hparams = HParams(cls=cls)
        hparams.append(CMVN.params())

        upper_frequency_limit = 0
        lower_frequency_limit = 20.0
        filterbank_channel_count = 80.0
        window_length = 0.025
        frame_length = 0.010
        raw_energy = 1
        preEph_coeff = 0.97
        window_type = 'povey'
        remove_dc_offset = True
        is_fbank = True
        output_type = 1
        dither = 0.0
        snip_edges = True
        preemph_coeff = 0.0
        min_f0 = 50.0
        max_f0 = 400.0
        soft_min_f0 = 10.0
        penalty_factor = 0.1
        lowpass_cutoff = 1000.0
        resample_freq = 4000.0
        delta_pitch = 0.005
        nccf_ballast = 7000.0
        lowpass_filter_width = 1
        upsample_filter_width = 5
        max_frames_latency = 0
        frames_per_chunk = 0
        simulate_first_pass_online = False
        recompute_frame = 500
        nccf_ballast_online = False

        # delta
        delta_delta = False  # True
        order = 2
        window = 2
        hparams.add_hparam('delta_delta', delta_delta)
        hparams.add_hparam('order', order)
        hparams.add_hparam('window', window)
        hparams.add_hparam('channel', 1)

        if hparams.delta_delta:
            hparams.channel = hparams.order + 1

        hparams.add_hparam('snip_edges', snip_edges)
        hparams.add_hparam('preemph_coeff', preemph_coeff)
        hparams.add_hparam('min_f0', min_f0)
        hparams.add_hparam('max_f0', max_f0)
        hparams.add_hparam('dither', dither)
        hparams.add_hparam('soft_min_f0', soft_min_f0)
        hparams.add_hparam('penalty_factor', penalty_factor)
        hparams.add_hparam('lowpass_cutoff', lowpass_cutoff)
        hparams.add_hparam('resample_freq', resample_freq)
        hparams.add_hparam('delta_pitch', delta_pitch)
        hparams.add_hparam('nccf_ballast', nccf_ballast)
        hparams.add_hparam('lowpass_filter_width', lowpass_filter_width)
        hparams.add_hparam('upsample_filter_width', upsample_filter_width)
        hparams.add_hparam('max_frames_latency', max_frames_latency)
        hparams.add_hparam('frames_per_chunk', frames_per_chunk)
        hparams.add_hparam('simulate_first_pass_online',
                           simulate_first_pass_online)
        hparams.add_hparam('recompute_frame', recompute_frame)
        hparams.add_hparam('nccf_ballast_online', nccf_ballast_online)
        hparams.add_hparam('upper_frequency_limit', upper_frequency_limit)
        hparams.add_hparam('lower_frequency_limit', lower_frequency_limit)
        hparams.add_hparam('filterbank_channel_count',
                           filterbank_channel_count)
        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('output_type', output_type)
        hparams.add_hparam('raw_energy', raw_energy)
        hparams.add_hparam('preEph_coeff', preEph_coeff)
        hparams.add_hparam('window_type', window_type)
        hparams.add_hparam('remove_dc_offset', remove_dc_offset)
        hparams.add_hparam('is_fbank', is_fbank)

        if config is not None:
            hparams.parse(config, True)

        return hparams
Пример #12
0
    def params(cls, config=None):
        """
        Set params.
        :param config: contains thirteen optional parameters:
                --window_length				: Window length in seconds. (float, default = 0.025)
                --frame_length				: Hop length in seconds. (float, default = 0.010)
                --snip_edges				: If True, the last frame (shorter than window_length) will be
                                              cutoff. If False, 1 // 2 frame_length data will be padded
                                              to data. (bool, default = True)
                ---raw_energy				: If 1, compute frame energy before preemphasis and
                                              windowing. If 2,  compute frame energy after
                                              preemphasis and windowing. (int, default = 1)
                --preEph_coeff				: Coefficient for use in frame-signal preemphasis.
                                             (float, default = 0.0)
                --window_type				: Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                                             (string, default = "hann")
                --remove_dc_offset			: Subtract mean from waveform on each frame.
                                              (bool, default = false)
                --is_fbank					: If true, compute power spetrum without frame energy.
                                              If false, using the frame energy instead of the
                                              square of the constant component of the signal.
                                              (bool, default = true)
                --output_type				: If 1, return power spectrum. If 2, return log-power
                                              spectrum. If 3, return magnitude spectrum. (int, default = 3)
                --upper_frequency_limit		: High cutoff frequency for mel bins (if <= 0, offset
                                             from Nyquist) (float, default = 0)
                --lower_frequency_limit		: Low cutoff frequency for mel bins (float, default = 20)
                --filterbank_channel_count	: Number of triangular mel-frequency bins.
                                             (float, default = 23)
                --dither			    	: Dithering constant (0.0 means no dither).
                                             (float, default = 0) [add robust to training]
        :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
        """

        hparams = HParams(cls=cls)

        # spectrum
        hparams.append(
            Spectrum.params({
                'output_type': 3,
                'is_fbank': True,
                'preEph_coeff': 0.0,
                'window_type': 'hann',
                'dither': 0.0,
                'remove_dc_offset': False
            }))

        # mel_spectrum
        upper_frequency_limit = 0
        lower_frequency_limit = 60
        filterbank_channel_count = 40
        sample_rate = -1
        hparams.add_hparam('upper_frequency_limit', upper_frequency_limit)
        hparams.add_hparam('lower_frequency_limit', lower_frequency_limit)
        hparams.add_hparam('filterbank_channel_count',
                           filterbank_channel_count)
        hparams.add_hparam('sample_rate', sample_rate)

        # delta
        delta_delta = False  # True
        order = 2
        window = 2
        hparams.add_hparam('delta_delta', delta_delta)
        hparams.add_hparam('order', order)
        hparams.add_hparam('window', window)

        if config is not None:
            hparams.parse(config, True)

        hparams.type = 'MelSpectrum'

        hparams.add_hparam('channel', 1)
        if hparams.delta_delta:
            hparams.channel = hparams.order + 1

        return hparams
Пример #13
0
    def params(cls, config=None):
        """Set params.

        Args:
            config: contains the following fifteen optional parameters:

            'window_length': Window length in seconds. (float, default = 0.025),
            'frame_length': Hop length in seconds. (float, default = 0.010),
            'snip_edges': If 1, the last frame (shorter than window_length) will be
                          cutoff. If 2, 1 // 2 frame_length data will be padded
                          to data. (int, default = 1),
            'preEph_coeff': Coefficient for use in frame-signal preemphasis.
                            (float, default = 0.97),
            'window_type': Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                            (string, default = "povey")
            'remove_dc_offset': Subtract mean from waveform on each frame.
                                (bool, default = true)
            'is_fbank': If true, compute power spetrum without frame energy.
                          If false, using the frame energy instead of the
                          square of the constant component of the signal.
                          (bool, default = true)
            'coefficient_count': Number of cepstra in MFCC computation. (int, default = 13)
            'output_type': If 1, return power spectrum. If 2, return log-power
                            spectrum. (int, default = 1)
            'upper_frequency_limit': High cutoff frequency for mel bins (if <= 0, offset
                                      from Nyquist) (float, default = 0)
            'lower_frequency_limit': Low cutoff frequency for mel bins. (float, default = 20)
            'filterbank_channel_count': Number of triangular mel-frequency bins.
                                        (float, default = 23)
            'dither': Dithering constant (0.0 means no dither).
                      (float, default = 1) [add robust to training]
            'cepstral_lifter': Constant that controls scaling of MFCCs. (float, default = 22)
            'use_energy': Use energy (not C0) in MFCC computation. (bool, default = True)

        Note:
            Return an object of class HParams, which is a set of hyperparameters as
            name-value pairs.
        """

        upper_frequency_limit = 0.0
        lower_frequency_limit = 20.0
        filterbank_channel_count = 23.0
        window_length = 0.025
        frame_length = 0.010
        output_type = 1
        snip_edges = 1
        raw_energy = 1
        preEph_coeff = 0.97
        window_type = "povey"
        remove_dc_offset = True
        is_fbank = True
        cepstral_lifter = 22.0
        coefficient_count = 13
        use_energy = True
        dither = 0.0
        delta_delta = False
        order = 2
        window = 2

        hparams = HParams(cls=cls)
        hparams.add_hparam("upper_frequency_limit", upper_frequency_limit)
        hparams.add_hparam("lower_frequency_limit", lower_frequency_limit)
        hparams.add_hparam("filterbank_channel_count",
                           filterbank_channel_count)
        hparams.add_hparam("window_length", window_length)
        hparams.add_hparam("frame_length", frame_length)
        hparams.add_hparam("output_type", output_type)
        hparams.add_hparam("snip_edges", snip_edges)
        hparams.add_hparam("raw_energy", raw_energy)
        hparams.add_hparam("preEph_coeff", preEph_coeff)
        hparams.add_hparam("window_type", window_type)
        hparams.add_hparam("remove_dc_offset", remove_dc_offset)
        hparams.add_hparam("is_fbank", is_fbank)
        hparams.add_hparam("cepstral_lifter", cepstral_lifter)
        hparams.add_hparam("coefficient_count", coefficient_count)
        hparams.add_hparam("use_energy", use_energy)
        hparams.add_hparam("dither", dither)
        hparams.add_hparam("delta_delta", delta_delta)
        hparams.add_hparam("order", order)
        hparams.add_hparam("window", window)
        hparams.add_hparam("channel", 1)

        hparams.append(CMVN.params())

        if config is not None:
            hparams.parse(config, True)

        return hparams
Пример #14
0
    def params(cls, config=None):
        """Set params.

        Args:
            config: contains the following thirteen optional parameters:

            'window_length': Window length in seconds. (float, default = 0.025)
            'frame_length': Hop length in seconds. (float, default = 0.010)
            'snip_edges': If 1, the last frame (shorter than window_length) will be
                          cutoff. If 2, 1 // 2 frame_length data will be padded
                          to data. (int, default = 1)
            'preEph_coeff': Coefficient for use in frame-signal preemphasis.
                            (float, default = 0.97)
            'window_type': Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                            (string, default = "povey")
            'remove_dc_offset': Subtract mean from waveform on each frame.
                                (bool, default = true)
            'is_fbank': If true, compute power spetrum without frame energy.
                          If false, using the frame energy instead of the
                          square of the constant component of the signal.
                          (bool, default = true)
            'is_log10': If true, using log10 to fbank. If false, using loge.
                        (bool, default = false)
            'output_type': If 1, return power spectrum. If 2, return log-power
                            spectrum. (int, default = 1)
            'upper_frequency_limit': High cutoff frequency for mel bins (if <= 0, offset
                                      from Nyquist) (float, default = 0)
            'lower_frequency_limit': Low cutoff frequency for mel bins (float, default = 20)
            'filterbank_channel_count': Number of triangular mel-frequency bins.
                                        (float, default = 23)
            'dither': Dithering constant (0.0 means no dither).
                      (float, default = 1) [add robust to training]

        Note:
            Return an object of class HParams, which is a set of hyperparameters as
            name-value pairs.
        """

        hparams = HParams(cls=cls)

        # spectrum
        hparams.append(Spectrum.params({"output_type": 1, "is_fbank": True}))

        # fbank
        upper_frequency_limit = 0
        lower_frequency_limit = 60
        filterbank_channel_count = 40
        is_log10 = False
        hparams.add_hparam("upper_frequency_limit", upper_frequency_limit)
        hparams.add_hparam("lower_frequency_limit", lower_frequency_limit)
        hparams.add_hparam("filterbank_channel_count",
                           filterbank_channel_count)
        hparams.add_hparam('is_log10', is_log10)

        # delta
        delta_delta = False  # True
        order = 2
        window = 2
        hparams.add_hparam("delta_delta", delta_delta)
        hparams.add_hparam("order", order)
        hparams.add_hparam("window", window)

        if config is not None:
            hparams.parse(config, True)

        hparams.type = "Fbank"

        hparams.add_hparam("channel", 1)
        if hparams.delta_delta:
            hparams.channel = hparams.order + 1

        return hparams
Пример #15
0
    def params(cls, config=None):
        """
        Set params.
        :param config: contains fourteen optional parameters.
            --window_length				: Window length in seconds. (float, default = 0.025)
            --frame_length				: Hop length in seconds. (float, default = 0.010)
            --snip_edges				: If 1, the last frame (shorter than window_length) will
                                          be cutoff. If 2, 1 // 2 frame_length data will be padded
                                          to data. (int, default = 1)
            ---raw_energy				: If 1, compute frame energy before preemphasis and
                                          windowing. If 2, compute frame energy after
                                          preemphasis and windowing. (int, default = 1)
            --preEph_coeff			    : Coefficient for use in frame-signal preemphasis.
                                          (float, default = 0.97)
            --window_type				: Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                                          (string, default = "povey")
            --remove_dc_offset		    : Subtract mean from waveform on each frame
                                          (bool, default = true)
            --is_fbank					: If true, compute power spetrum without frame energy. If
                                          false, using the frame energy instead of the square of the
                                          constant component of the signal. (bool, default = true)
            --output_type				: If 1, return power spectrum. If 2, return log-power
                                          spectrum. (int, default = 1)
            --upper_frequency_limit		: High cutoff frequency for mel bins (if < 0, offset from
                                          Nyquist) (float, default = 0)
            --lower_frequency_limit		: Low cutoff frequency for mel bins (float, default = 20)
            --filterbank_channel_count	: Number of triangular mel-frequency bins.
                                         (float, default = 23)
            --coefficient_count         : Number of cepstra in MFCC computation.
                                         (int, default = 13)
            --cepstral_lifter           : Constant that controls scaling of MFCCs.
                                         (float, default = 22)
            --use_energy                :Use energy (not C0) in MFCC computation.
                                         (bool, default = True)
        :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
        """

        upper_frequency_limit = 0.0
        lower_frequency_limit = 20.0
        filterbank_channel_count = 23.0
        window_length = 0.025
        frame_length = 0.010
        output_type = 1
        snip_edges = 1
        raw_energy = 1
        preEph_coeff = 0.97
        window_type = "povey"
        remove_dc_offset = True
        is_fbank = True
        cepstral_lifter = 22.0
        coefficient_count = 13
        use_energy = True
        dither = 0.0
        delta_delta = False
        order = 2
        window = 2

        hparams = HParams(cls=cls)
        hparams.add_hparam("upper_frequency_limit", upper_frequency_limit)
        hparams.add_hparam("lower_frequency_limit", lower_frequency_limit)
        hparams.add_hparam("filterbank_channel_count",
                           filterbank_channel_count)
        hparams.add_hparam("window_length", window_length)
        hparams.add_hparam("frame_length", frame_length)
        hparams.add_hparam("output_type", output_type)
        hparams.add_hparam("snip_edges", snip_edges)
        hparams.add_hparam("raw_energy", raw_energy)
        hparams.add_hparam("preEph_coeff", preEph_coeff)
        hparams.add_hparam("window_type", window_type)
        hparams.add_hparam("remove_dc_offset", remove_dc_offset)
        hparams.add_hparam("is_fbank", is_fbank)
        hparams.add_hparam("cepstral_lifter", cepstral_lifter)
        hparams.add_hparam("coefficient_count", coefficient_count)
        hparams.add_hparam("use_energy", use_energy)
        hparams.add_hparam("dither", dither)
        hparams.add_hparam("delta_delta", delta_delta)
        hparams.add_hparam("order", order)
        hparams.add_hparam("window", window)
        hparams.add_hparam("channel", 1)

        hparams.append(CMVN.params())

        if config is not None:
            hparams.parse(config, True)

        return hparams