def process(self, algorithm='as', reset=False):

        if algorithm not in ['as']:
            raise AcousticSimError('Spectrogram algorithm must be one of: as')
        if reset:
            self.data = {}
        if self.data:
            raise AcousticSimError('Data already exists for this representation, use reset=True to generate new data.')

        self.pspec = file_to_powerspec(self.file_path, self.win_len, self.time_step)
        for k in self.pspec.keys():
            nfft = (len(self.pspec[k])-1) * 2
            self.data[k] = 10 * np.log10(self.pspec[k] + np.spacing(1))

        self.freqs = (self.sr / nfft) * np.arange((nfft/2) + 1)
Пример #2
0
 def process(self, reset=False):
     if reset:
         self.data = {}
     if self.data:
         raise AcousticSimError(
             'Data already exists for this representation, use reset=True to generate new data.'
         )
     self.data = file_to_amplitude_envelopes(self.file_path, self.num_bands,
                                             self.min_freq, self.max_freq)
Пример #3
0
 def process(self, algorithm='lpc', executable_path=None, reset=False):
     if algorithm not in ['lpc', 'praat']:
         raise AcousticSimError(
             'Formant algorithm must be one of: lpc, praat')
     if reset:
         self.data = {}
     if self.data:
         raise AcousticSimError(
             'Data already exists for this representation, use reset=True to generate new data.'
         )
     if algorithm == 'lpc':
         data = file_to_formants(self.file_path, self.num_formants,
                                 self.max_freq, self.win_len,
                                 self.time_step)
     else:
         data = file_to_formants_praat(self.file_path, executable_path,
                                       self.num_formants, self.max_freq,
                                       self.win_len, self.time_step)
     self.data = data
Пример #4
0
def analyze_directories(directories, **kwargs):
    stop_check = kwargs.get('stop_check',None)
    call_back = kwargs.get('call_back',None)

    files = []
    kwargs['attributes'] = dict()

    if call_back is not None:
        call_back('Mapping directories...')
        call_back(0,len(directories))
        cur = 0
    for d in directories:
        if not os.path.isdir(d):
            continue
        if stop_check is not None and stop_check():
            return
        if call_back is not None:
            cur += 1
            if cur % 3 == 0:
                call_back(cur)

        files += [os.path.join(d,x) for x in os.listdir(d) if x.lower().endswith('.wav')]

        att_path = os.path.join(d,'attributes.txt')
        if os.path.exists(att_path):
            kwargs['attributes'].update(load_attributes(att_path))
    if len(files) == 0:
        raise(AcousticSimError("The directories specified do not contain any wav files"))


    if call_back is not None:
        call_back('Mapping directories...')
        call_back(0,len(files)*len(files))
        cur = 0
    path_mapping = list()
    for x in files:
        for y in files:
            if stop_check is not None and stop_check():
                return
            if call_back is not None:
                cur += 1
                if cur % 20 == 0:
                    call_back(cur)
            if not x.lower().endswith('.wav'):
                continue
            if not y.lower().endswith('.wav'):
                continue
            if x == y:
                continue
            path_mapping.append((x,y))

    result = acoustic_similarity_mapping(path_mapping, **kwargs)
    return result
Пример #5
0
 def process(self, algorithm='rastamat', executable_path=None, reset=False):
     if algorithm not in ['rastamat', 'praat']:
         raise AcousticSimError(
             'Formant algorithm must be one of: lpc, praat')
     if reset:
         self.data = {}
     if self.data:
         raise AcousticSimError(
             'Data already exists for this representation, use reset=True to generate new data.'
         )
     if algorithm == 'rastamat':
         data = file_to_mfcc(self.file_path, self.win_len, self.time_step,
                             self.min_freq, self.max_freq, self.num_filters,
                             self.num_coeffs, self.use_power, self.deltas)
     else:
         data = file_to_mfcc_praat(self.file_path, executable_path,
                                   self.win_len, self.time_step,
                                   self.min_freq, self.max_freq,
                                   self.num_filters, self.num_coeffs,
                                   self.use_power, self.deltas)
     self.data = data
Пример #6
0
    def process(self,debug = True, signal = None, suppress_error = False):
        """Generate MFCCs in the style of HTK from a full path to a .wav file.

        Parameters
        ----------
        filename : str
            Full path to .wav file to process.
        freq_lims : tuple
            Minimum and maximum frequencies in Hertz to use.
        num_coeffs : int
            Number of coefficients of the cepstrum to return.
        win_len : float
            Window length in seconds to use for FFT.
        time_step : float
            Time step in seconds for windowing.
        num_filters : int
            Number of mel filters to use in the filter bank, defaults to 26.
        use_power : bool
            If true, use the first coefficient of the cepstrum, which is power
            based.  Defaults to false.

        Returns
        -------
        2D array
            MFCCs for each frame.  The first dimension is the time in frames,
            the second dimension is the MFCC values.

        """
        if signal is None:
            if self._filepath is None:
                if suppress_error:
                    return
                raise(AcousticSimError('Must specify a either a filepath for the Mfcc object or a signal to process.'))
            self._sr, proc = preproc(self._filepath,alpha=0.97)
        else:
            self._sr, proc = signal
        self._duration = len(proc) / self._sr

        L = 22
        n = arange(self._num_filters)
        lift = 1+ (L/2)*sin(pi*n/L)
        lift = diag(lift)

        pspec = to_powerspec(proc,self._sr,self._win_len,self._time_step)

        filterbank = self.filter_bank((len(next(iter(pspec.values())))-1) * 2)

        #self._rep = zeros((num_frames,self._num_coeffs))
        self._rep = dict()
        #aspec = zeros((num_frames,self._num_filters))
        aspec = dict()
        for k in pspec:
            filteredSpectrum = dot(sqrt(pspec[k]), filterbank)**2
            aspec[k] = filteredSpectrum
            dctSpectrum = dct_spectrum(filteredSpectrum)
            dctSpectrum = dot(dctSpectrum , lift)
            if not self._use_power:
                dctSpectrum = dctSpectrum[1:]
            self._rep[k] = dctSpectrum[:self._num_coeffs]
        if self._deltas:
            keys = sorted(self._rep.keys())
            for i,k in enumerate(keys):
                if i == 0 or i == len(self._rep.keys()) - 1:
                    self._rep[k] = array(list(self._rep[k]) + [0 for x in range(self._num_coeffs)])
                else:
                    deltas = self._rep[keys[i+1]][:self._num_coeffs] - self._rep[keys[i-1]][:self._num_coeffs]
                    self._rep[k] = array(list(self._rep[k]) + list(deltas))
            for i,k in enumerate(keys):
                if i == 0 or i == len(self._rep.keys()) - 1:
                    self._rep[k] = array(list(self._rep[k]) + [0 for x in range(self._num_coeffs)])
                else:
                    deltas = self._rep[keys[i+1]][self._num_coeffs:self._num_coeffs*2] - self._rep[keys[i-1]][self._num_coeffs:self._num_coeffs*2]
                    self._rep[k] = array(list(self._rep[k]) + list(deltas))

        if debug:
            return pspec,aspec
Пример #7
0
def analyze_directories(directories, **kwargs):
    """
    Analyze many directories.

    Parameters
    ----------
    directories : list of str
        List of fully specified paths to the directories to be analyzed
    rep : {'envelopes','mfcc'}, optional
        The type of representation to convert the wav files into before
        comparing for similarity.  Amplitude envelopes will be computed
        when 'envelopes' is specified, and MFCCs will be computed when
        'mfcc' is specified (default).
    match_function : {'dtw', 'xcorr'}, optional
        How similarity/distance will be calculated.  Defaults to 'dtw' to
        use Dynamic Time Warping (can be slower) to compute distance.
        Cross-correlation can be specified with 'xcorr', which computes
        distance as the inverse of a maximum cross-correlation value
        between 0 and 1.
    num_filters : int, optional
        The number of frequency filters to use when computing representations.
        Defaults to 8 for amplitude envelopes and 26 for MFCCs.
    num_coeffs : int, optional
        The number of coefficients to use for MFCCs (not used for
        amplitude envelopes).  Default is 20, which captures speaker-
        specific information, whereas 12 would be more speaker-independent.
    freq_lims : tuple, optional
        A tuple of the minimum frequency and maximum frequency in Hertz to use
        for computing representations.  Defaults to (80, 7800) following
        Lewandowski's dissertation (2012).
    output_sim : bool, optional
        If true (default), the function will return similarities (inverse distance).
        If false, distance measures will be returned instead.

    """
    stop_check = kwargs.get('stop_check', None)
    call_back = kwargs.get('call_back', None)

    files = []
    kwargs['attributes'] = dict()

    if call_back is not None:
        call_back('Mapping directories...')
        call_back(0, len(directories))
        cur = 0
    for d in directories:
        if not os.path.isdir(d):
            continue
        if stop_check is not None and stop_check():
            return
        if call_back is not None:
            cur += 1
            if cur % 3 == 0:
                call_back(cur)

        files += [
            os.path.join(d, x) for x in os.listdir(d)
            if x.lower().endswith('.wav')
        ]

        att_path = os.path.join(d, 'attributes.txt')
        if os.path.exists(att_path):
            kwargs['attributes'].update(load_attributes(att_path))
    if len(files) == 0:
        raise (AcousticSimError(
            "The directories specified do not contain any wav files"))

    if call_back is not None:
        call_back('Mapping directories...')
        call_back(0, len(files) * len(files))
        cur = 0
    path_mapping = list()
    for x in files:
        for y in files:
            if stop_check is not None and stop_check():
                return
            if call_back is not None:
                cur += 1
                if cur % 20 == 0:
                    call_back(cur)
            if not x.lower().endswith('.wav'):
                continue
            if not y.lower().endswith('.wav'):
                continue
            if x == y:
                continue
            path_mapping.append((x, y))

    result = acoustic_similarity_mapping(path_mapping, **kwargs)
    return result