def process(self, algorithm='as', reset=False): if algorithm not in ['as']: raise AcousticSimError('Spectrogram algorithm must be one of: as') if reset: self.data = {} if self.data: raise AcousticSimError('Data already exists for this representation, use reset=True to generate new data.') self.pspec = file_to_powerspec(self.file_path, self.win_len, self.time_step) for k in self.pspec.keys(): nfft = (len(self.pspec[k])-1) * 2 self.data[k] = 10 * np.log10(self.pspec[k] + np.spacing(1)) self.freqs = (self.sr / nfft) * np.arange((nfft/2) + 1)
def process(self, reset=False): if reset: self.data = {} if self.data: raise AcousticSimError( 'Data already exists for this representation, use reset=True to generate new data.' ) self.data = file_to_amplitude_envelopes(self.file_path, self.num_bands, self.min_freq, self.max_freq)
def process(self, algorithm='lpc', executable_path=None, reset=False): if algorithm not in ['lpc', 'praat']: raise AcousticSimError( 'Formant algorithm must be one of: lpc, praat') if reset: self.data = {} if self.data: raise AcousticSimError( 'Data already exists for this representation, use reset=True to generate new data.' ) if algorithm == 'lpc': data = file_to_formants(self.file_path, self.num_formants, self.max_freq, self.win_len, self.time_step) else: data = file_to_formants_praat(self.file_path, executable_path, self.num_formants, self.max_freq, self.win_len, self.time_step) self.data = data
def analyze_directories(directories, **kwargs): stop_check = kwargs.get('stop_check',None) call_back = kwargs.get('call_back',None) files = [] kwargs['attributes'] = dict() if call_back is not None: call_back('Mapping directories...') call_back(0,len(directories)) cur = 0 for d in directories: if not os.path.isdir(d): continue if stop_check is not None and stop_check(): return if call_back is not None: cur += 1 if cur % 3 == 0: call_back(cur) files += [os.path.join(d,x) for x in os.listdir(d) if x.lower().endswith('.wav')] att_path = os.path.join(d,'attributes.txt') if os.path.exists(att_path): kwargs['attributes'].update(load_attributes(att_path)) if len(files) == 0: raise(AcousticSimError("The directories specified do not contain any wav files")) if call_back is not None: call_back('Mapping directories...') call_back(0,len(files)*len(files)) cur = 0 path_mapping = list() for x in files: for y in files: if stop_check is not None and stop_check(): return if call_back is not None: cur += 1 if cur % 20 == 0: call_back(cur) if not x.lower().endswith('.wav'): continue if not y.lower().endswith('.wav'): continue if x == y: continue path_mapping.append((x,y)) result = acoustic_similarity_mapping(path_mapping, **kwargs) return result
def process(self, algorithm='rastamat', executable_path=None, reset=False): if algorithm not in ['rastamat', 'praat']: raise AcousticSimError( 'Formant algorithm must be one of: lpc, praat') if reset: self.data = {} if self.data: raise AcousticSimError( 'Data already exists for this representation, use reset=True to generate new data.' ) if algorithm == 'rastamat': data = file_to_mfcc(self.file_path, self.win_len, self.time_step, self.min_freq, self.max_freq, self.num_filters, self.num_coeffs, self.use_power, self.deltas) else: data = file_to_mfcc_praat(self.file_path, executable_path, self.win_len, self.time_step, self.min_freq, self.max_freq, self.num_filters, self.num_coeffs, self.use_power, self.deltas) self.data = data
def process(self,debug = True, signal = None, suppress_error = False): """Generate MFCCs in the style of HTK from a full path to a .wav file. Parameters ---------- filename : str Full path to .wav file to process. freq_lims : tuple Minimum and maximum frequencies in Hertz to use. num_coeffs : int Number of coefficients of the cepstrum to return. win_len : float Window length in seconds to use for FFT. time_step : float Time step in seconds for windowing. num_filters : int Number of mel filters to use in the filter bank, defaults to 26. use_power : bool If true, use the first coefficient of the cepstrum, which is power based. Defaults to false. Returns ------- 2D array MFCCs for each frame. The first dimension is the time in frames, the second dimension is the MFCC values. """ if signal is None: if self._filepath is None: if suppress_error: return raise(AcousticSimError('Must specify a either a filepath for the Mfcc object or a signal to process.')) self._sr, proc = preproc(self._filepath,alpha=0.97) else: self._sr, proc = signal self._duration = len(proc) / self._sr L = 22 n = arange(self._num_filters) lift = 1+ (L/2)*sin(pi*n/L) lift = diag(lift) pspec = to_powerspec(proc,self._sr,self._win_len,self._time_step) filterbank = self.filter_bank((len(next(iter(pspec.values())))-1) * 2) #self._rep = zeros((num_frames,self._num_coeffs)) self._rep = dict() #aspec = zeros((num_frames,self._num_filters)) aspec = dict() for k in pspec: filteredSpectrum = dot(sqrt(pspec[k]), filterbank)**2 aspec[k] = filteredSpectrum dctSpectrum = dct_spectrum(filteredSpectrum) dctSpectrum = dot(dctSpectrum , lift) if not self._use_power: dctSpectrum = dctSpectrum[1:] self._rep[k] = dctSpectrum[:self._num_coeffs] if self._deltas: keys = sorted(self._rep.keys()) for i,k in enumerate(keys): if i == 0 or i == len(self._rep.keys()) - 1: self._rep[k] = array(list(self._rep[k]) + [0 for x in range(self._num_coeffs)]) else: deltas = self._rep[keys[i+1]][:self._num_coeffs] - self._rep[keys[i-1]][:self._num_coeffs] self._rep[k] = array(list(self._rep[k]) + list(deltas)) for i,k in enumerate(keys): if i == 0 or i == len(self._rep.keys()) - 1: self._rep[k] = array(list(self._rep[k]) + [0 for x in range(self._num_coeffs)]) else: deltas = self._rep[keys[i+1]][self._num_coeffs:self._num_coeffs*2] - self._rep[keys[i-1]][self._num_coeffs:self._num_coeffs*2] self._rep[k] = array(list(self._rep[k]) + list(deltas)) if debug: return pspec,aspec
def analyze_directories(directories, **kwargs): """ Analyze many directories. Parameters ---------- directories : list of str List of fully specified paths to the directories to be analyzed rep : {'envelopes','mfcc'}, optional The type of representation to convert the wav files into before comparing for similarity. Amplitude envelopes will be computed when 'envelopes' is specified, and MFCCs will be computed when 'mfcc' is specified (default). match_function : {'dtw', 'xcorr'}, optional How similarity/distance will be calculated. Defaults to 'dtw' to use Dynamic Time Warping (can be slower) to compute distance. Cross-correlation can be specified with 'xcorr', which computes distance as the inverse of a maximum cross-correlation value between 0 and 1. num_filters : int, optional The number of frequency filters to use when computing representations. Defaults to 8 for amplitude envelopes and 26 for MFCCs. num_coeffs : int, optional The number of coefficients to use for MFCCs (not used for amplitude envelopes). Default is 20, which captures speaker- specific information, whereas 12 would be more speaker-independent. freq_lims : tuple, optional A tuple of the minimum frequency and maximum frequency in Hertz to use for computing representations. Defaults to (80, 7800) following Lewandowski's dissertation (2012). output_sim : bool, optional If true (default), the function will return similarities (inverse distance). If false, distance measures will be returned instead. """ stop_check = kwargs.get('stop_check', None) call_back = kwargs.get('call_back', None) files = [] kwargs['attributes'] = dict() if call_back is not None: call_back('Mapping directories...') call_back(0, len(directories)) cur = 0 for d in directories: if not os.path.isdir(d): continue if stop_check is not None and stop_check(): return if call_back is not None: cur += 1 if cur % 3 == 0: call_back(cur) files += [ os.path.join(d, x) for x in os.listdir(d) if x.lower().endswith('.wav') ] att_path = os.path.join(d, 'attributes.txt') if os.path.exists(att_path): kwargs['attributes'].update(load_attributes(att_path)) if len(files) == 0: raise (AcousticSimError( "The directories specified do not contain any wav files")) if call_back is not None: call_back('Mapping directories...') call_back(0, len(files) * len(files)) cur = 0 path_mapping = list() for x in files: for y in files: if stop_check is not None and stop_check(): return if call_back is not None: cur += 1 if cur % 20 == 0: call_back(cur) if not x.lower().endswith('.wav'): continue if not y.lower().endswith('.wav'): continue if x == y: continue path_mapping.append((x, y)) result = acoustic_similarity_mapping(path_mapping, **kwargs) return result