def parse_header(header_str): header = Header() header.tex_width = pylab.fromstring(header_str[4:8], "int32")[0] header.tex_height = pylab.fromstring(header_str[8:12], "int32")[0] header.start_char = pylab.fromstring(header_str[12:16], "int32")[0] header.end_char = pylab.fromstring(header_str[16:20], "int32")[0] return header
def get_mfcc(filename, path, label): wav = wave.open(filename, 'r') frames = wav.readframes(-1) audio = np.asarray(pylab.fromstring(frames, 'Int16')) / np.max( np.abs(np.asarray(pylab.fromstring(frames, 'Int16')))) sample_rate = wav.getframerate() wav.close() n = len(audio) audio = audio[n - 6688:] n = 6688 total_ts_sec = n / sample_rate FFT_size = 2048 audio_framed = frame_audio(audio, FFT_size, FFT_size // 2) window = create_hanning_window(FFT_size) audio_win = audio_framed * window dim = (FFT_size // 2 + 1, audio_win.T.shape[1]) audio_power = np.abs(do_fft(audio_win, FFT_size, dim)) audio_power = audio_power * audio_power mel_freqs = get_filter_points(8000, 10, FFT_size) x = (1 + FFT_size) / 16000 filter_points = np.floor(x * mel_freqs).astype(int) filters = get_filters(filter_points, FFT_size) enorm = 2.0 / (mel_freqs[2:10 + 2] - mel_freqs[:10]) filters *= enorm[:, np.newaxis] audio_powerT = audio_power.T audio_filtered = filters.dot(audio_powerT) audio_log = 10.0 * np.log10(audio_filtered) audio_log.shape dct_filter_num = 40 samples = np.arange(1, 2 * 10, 2) * np.pi / (2.0 * 10) # dct_filters = dct(samples) dct_filter = dct_inbuilt(audio_log) cepstral_coefficients = dct_filter.T.dot(audio_log) print("label:", label) spath = path + "mfcc/" + label + "/" + ntpath.basename( filename)[:-4] + ".csv" # np.savetxt(spath,cepstral_coefficients,delimiter=',') fg = plt.figure(figsize=(15, 5)) ender = len(audio) / sample_rate incer = ender / (len(audio) - 1) xps = np.arange(0, ender + incer / 2, incer) plt.plot(np.linspace(0, len(audio) / sample_rate, num=len(audio)), audio) # plt.imshow(cepstral_coefficients, aspect='auto', origin='lower'); spath = path + "plot/" + label + "/" + ntpath.basename( filename)[:-4] + ".png" # fg.savefig(spath,bbox_inches='tight') return cepstral_coefficients
def wavread( name ): file = wave.open( name, 'r' ) [Channels,Bytes,Fs,Frames,Compress,CompressName] = file.getparams() # (nchannels, sampwidth in bytes, sampling frequency, nframes, comptype, compname) Data = file.readframes( Frames ) Bits = Bytes*8 if Bits==16: # 16 bits per sample Data = fromstring( Data, 'h' ) / 32767.0 # -1..1 values, Int16 because Bits=2x8=16 elif Bits==8: # 8 bits per sample Data = (fromstring( Data, 'b' ) / 128.0 ) - 1.0 # -1..1 values else: print "Error. Sorry, this wavread function only supports 8 or 16 bits wav files." return -1, -1, -1 file.close() #print "Fs: ",Fs,"\nBits: ",Bits,"\nChannels: ",Channels return Data, Fs, Bits
def get_info(au_file, name): file = sunau.open("genres/" + name + "/" + au_file, 'r') frames = file.readframes(file.getnframes()) sound_info = pylab.fromstring(frames, 'Int16') frame_rate = file.getframerate() file.close() return sound_info, frame_rate
def get_wav_info(self, wav_file): wav = wave.open(wav_file, 'r') frames = wav.readframes(-1) sound_info = pylab.fromstring(frames, 'int16') frame_rate = wav.getframerate() wav.close() return sound_info, frame_rate
def getSampleInfo(sample): wav = wave.open(sample, 'r') frames = wav.readframes(-1) soundInfo = pylab.fromstring(frames, 'Int16') frameRate = wav.getframerate() wav.close() return soundInfo, frameRate
def show_single_wav(speech, name): ''' 画出噪声,干净语音,带噪语音,增强语音的波形图 幅度值范围-0.5~0.5 ''' spf = wave.open(speech, 'r') sound_info = spf.readframes(-1) sound_info1 = plt.fromstring(sound_info, "Int16") fig = plt.figure() fig1 = fig.add_subplot(111) fig1.plot(sound_info1) scale_x = 16000 scale_y1 = 1000 ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x / scale_x)) ticks_y1 = ticker.FuncFormatter( lambda x, pos: '{0:g}'.format(x / scale_y1)) fig1.xaxis.set_major_formatter(ticks_x) fig1.yaxis.set_major_formatter(ticks_y1) fig1.tick_params(labelsize=18) fig1.set_xlabel('T / s', fontsize=18) fig1.set_ylabel('Amp', fontsize=18) plt.title(name) plt.show() fig.savefig('pictures/' + name + '-wav.png') spf.close()
def show_single_spec(speech, name): ''' :param speech:wav格式的音频 :return: 频谱图像 ''' spf1 = wave.open(speech, 'r') sound_info1 = spf1.readframes(-1) sound_info1 = plt.fromstring(sound_info1, 'Int16') fig = plt.figure() fig1 = fig.add_subplot(111) f1 = spf1.getframerate() # print(len(sound_info1)) sound_info1 = sound_info1[1817:26054] spectrogram1 = plt.specgram(sound_info1, NFFT=256, Fs=f1, cmap='rainbow', scale_by_freq=True, sides='default') scale_y = 1000 ticks_y = ticker.FuncFormatter(lambda y, pos: '{0:g}'.format(y / scale_y)) fig1.yaxis.set_major_formatter(ticks_y) fig1.tick_params(labelsize=24) fig1.set_xlabel('T / s', fontsize=24) fig1.set_ylabel('F / kHz', fontsize=24) # plt.title(name) plt.show() fig.savefig(name + '-spec.png') spf1.close()
def get_wave_info(self, wav_file): self.wav = wave.open(wav_file, 'r') self.frames = self.wav.readframes(-1) self.sound_info = pl.fromstring(self.frames, 'int16') self.frame_rate = self.wav.getframerate() self.wav.close() return self.sound_info, self.frame_rate
def get_soundfile(filename, dplot=1): """ reads in the acii file, optionally plots """ #open file wf = aifc.open(filename, 'rb') #read data data = wf.readframes(wf.getnframes()) srate = wf.getframerate() #close file wf.close() #format data, time data_int = pylab.fromstring(data, dtype=np.int32) n = data_int.size time = np.arange(n) / float(srate) time = time * 1000 #ms #plot sound file if dplot: fig = plt.figure() ax1 = fig.add_subplot(1, 1, 1) ax1.plot(time, data_int) ax1.set_xlim(min(time), max(time)) ax1.set_xlabel("Time [ms]") ax1.set_ylabel("Amplitude") return data_int, srate
def read_file(wav_file): wav = wave.open(wav_file, 'r') frames = wav.readframes(-1) file_series = pylab.fromstring(frames, 'Int16') frame_rate = wav.getframerate() wav.close() return file_series, frame_rate
def get_wav_info(audio_path, wav): wav = wave.open(audio_path + '/' + wav, 'r') frames = wav.readframes(-1) sound_info = pylab.fromstring(frames, 'int16') frame_rate = wav.getframerate() wav.close() return sound_info, frame_rate
def get_info(wav_file: bytes) -> tuple: """ Get audiodata from the given the file path """ # Ensure this is actually bytes if not isinstance(wav_file, bytes): wav_file = wav_file.encode("utf-8") if wav_file.split(b".")[-1] == b"mp3": # This is an MP3 file not a wave file sound = AudioSegment.from_mp3(wav_file.decode("utf-8")) frames = sound._data frame_rate = sound.frame_rate else: # Open the wave file try: wav = wave.open(wav_file.decode("utf-8"), "r") frames = wav.readframes(-1) frame_rate = wav.getframerate() wav.close() except OSError: # If we fail to read this, then stop return None, None sound_info = pylab.fromstring(frames, "int16") return sound_info, frame_rate
def generateSpectogram(song): """ Generates spectogram for a song. Args: song: AugioSegement of song of which spectogram should be created. Returns: Spectrum from matplotlib.specgram with scale changed to logarithmic. """ song = song.set_channels(1) # sample only one channel # song = song.set_frame_rate(FREQ) # song = song.low_pass_filter(FREQ/2) # don't use too high frequencies frameRate = song.frame_rate frames = pylab.fromstring(song.raw_data, 'Int16') # generate spectogram spectrum, freqs, t = mlab.specgram(frames, Fs=frameRate, NFFT=DATA_POINTS, noverlap=NUM_OVER) # change to log scale spectrum = 10 * np.log2(spectrum) spectrum[spectrum == -np.inf] = 0 return spectrum
def recode(self): audio = pyaudio.PyAudio() stream = audio.open( format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, input_device_index=self.DEVICE_INDEX, frames_per_buffer=self.CHUNK) frames = [] data = stream.read(self.CHUNK, exception_on_overflow=False) check = pylab.fromstring(data, 'int16') if (sum(abs(check)) / len(check)) < self.Recode_Value: stream.stop_stream() stream.close() audio.terminate() return False for i in range(1, int(self.RATE / self.CHUNK * self.RECORD_SECONDS)): data = stream.read(self.CHUNK, exception_on_overflow=False) frames.append(data) stream.stop_stream() stream.close() audio.terminate() waveFile = wave.open(self.WAVE_OUTPUT_FILENAME, 'wb') waveFile.setnchannels(self.CHANNELS) waveFile.setsampwidth(audio.get_sample_size(self.FORMAT)) waveFile.setframerate(self.RATE) waveFile.writeframes(b''.join(frames)) waveFile.close() return True
def get_wav_info(wav_file): wav = wave.open(wav_file, "r") frames = wav.readframes(-1) sound_info = pylab.fromstring(frames, "Int16") frame_rate = wav.getframerate() wav.close() return sound_info, frame_rate
def get_soundfile(filename,dplot = 1): """ reads in the acii file, optionally plots """ #open file wf = aifc.open(filename, 'rb') #read data data = wf.readframes(wf.getnframes()) srate = wf.getframerate() #close file wf.close() #format data, time data_int = pylab.fromstring(data,dtype=np.int32) n = data_int.size time = np.arange(n)/float(srate) time = time*1000 #ms #plot sound file if dplot: fig = plt.figure() ax1 = fig.add_subplot(1,1,1) ax1.plot(time,data_int) ax1.set_xlim(min(time), max(time)) ax1.set_xlabel("Time [ms]") ax1.set_ylabel("Amplitude") return data_int, srate
def get_wav_info(wav_file): wav = wave.open(wav_file, 'r') frames = wav.readframes(-1) sound_info = pylab.fromstring(frames, 'Int16') frame_rate = wav.getframerate() wav.close() return sound_info, frame_rate
def get_wav_info(WAVE_OUTPUT_FILENAME): wav = wave.open(WAVE_OUTPUT_FILENAME, 'r') frames = wav.readframes(-1) sound_info = pylab.fromstring(frames, 'Int16') frame_rate = wav.getframerate() wav.close() return sound_info, frame_rate
def get_wav_info(wavFilename): wav = wave.open(wavFilename, 'r') frames = wav.readframes(-1) sound_info = pylab.fromstring(frames, 'int16') frame_rate = wav.getframerate() wav.close() print("Frame rate: " + str(frame_rate)) return sound_info, frame_rate
def get_wav_info(wav_file): wav = wave.open(wav_file, 'r') frames = wav.readframes(-1) sound_info = pylab.fromstring(frames, 'int16') frame_rate = wav.getframerate() wav.close() return sound_info, frame_rate # graph_spectrogram('C:\\TestData\\test.wav')
def get_wav_info(wav_file): wav = wave.open(wav_file) frames = wav.readframes(-1) sound_info = pylab.fromstring(frames, 'int16') frame_rate = wav.getframerate() wav.close() print(wav_file) print("Dosya Açıldı") return sound_info, frame_rate
def open_tune(path_to_tune, duration=-1): tune = wave.open(path_to_tune, 'r') if duration == -1: frames = tune.readframes(-1) else: frames = tune.readframes(duration*SAMP_RATE) freqs = pylab.fromstring(frames, 'Int16') tune.close() return freqs
def simulation(N, k, t, p, u0, fitness, f_migrant, p_migrant, freq_dependent): # compute relative fitness fitness = pylab.fromstring(fitness, sep=',') print('relative fitness [AA,Aa,aa]=\t', fitness) # get mutation rate u = pylab.fromstring(u0, sep=',') if len(u) == 1: u = pylab.array([u[0], u[0]]) print('mutation rate [A2a,a2A] =', u) # run simulation freq_matrix = pylab.zeros((k, t + 1, 3)) for i in range(k): freq_matrix[i, ] = diploid_simulation(N, p, u, fitness, f_migrant, p_migrant, t, freq_dependent) return freq_matrix
def open_tune(path_to_tune, duration=-1): tune = wave.open(path_to_tune, 'r') if duration == -1: frames = tune.readframes(-1) else: frames = tune.readframes(duration * SAMP_RATE) freqs = pylab.fromstring(frames, 'Int16') tune.close() return freqs
def get(self, request, start=0, stop=-1): spf = wave.open(StringIO(self.sound), 'r') framerate = float(spf.getframerate()) start = float(start) stop = float(stop) fig = figure() ax = fig.add_subplot(111, frameon=False) window_size = 0.005 NFFT = 128 #framerate*window_size frames = fromstring(spf.readframes(-1), 'Int16') duration = len(frames)/spf.getnframes()*framerate powers, freqs, times, image = ax.specgram( frames, Fs=framerate, NFFT=NFFT, noverlap=NFFT-1, #window=lambda x, alpha=2.5: exp(-0.5*(alpha * linspace(-(len(x)-1)/2., (len(x)-1)/2., len(x)) /(len(x)/2.))**2.)*x, cmap=cm.gray_r ) dyn_range = 70 preemph_start = 50 preemph_boost = 6 max_frame = -200 c = log(2, 10) #for f in range(len(powers)): # for t in range(len(powers[f])): # power = powers[f][t] # powers[f][t] = 10*log(abs(power), 10)+(log(freqs[f]/preemph_start, 2)*preemph_boost if freqs[f] >= preemph_start else 0) if power != 0 else -200 # max_frame = max(max_frame, powers[f][t]) #max_frame = 100 #for f in range(len(powers)): # for t in range(len(powers[f])): # powers[f][t] = 1.0-(max_frame-powers[f][t])/dyn_range if powers[f][t] > max_frame-dyn_range else 0 #ax.imshow(powers, origin='lower', aspect='auto', cmap=cm.gray_r) ax.set_xlim(start, stop if stop != -1 else spf.getnframes()/framerate) ax.set_xticks([]) ax.set_yticks([]) fig.subplots_adjust(left=0, right=1, bottom=0, top=1, wspace=0, hspace=0) fig.set_size_inches(1.78, 0.5) output = StringIO() fig.savefig(output, format='png', dpi=600) spf.close() return HTTPResponse(HTTPPreamble(headers=PNG_HEADERS.copy()), body=output.getvalue())
def get_wav_info(wav_file): wav = wave.open(wav_file, 'r') frames = wav.readframes(-1) sound_info = pylab.fromstring(frames, 'Int16') frame_rate = wav.getframerate() number_of_channels = wav.getnchannels() # print len(sound_info) # print frame_rate # print number_of_channels wav.close() return sound_info, frame_rate
def get_wav_info(file_path): wav = wave.open(file_path, 'r') frames = wav.readframes(-1) frames = pylab.fromstring(frames, 'Int16') frame_rate = wav.getframerate() byteDepth = wav.getsampwidth() bitDepth = byteDepth * 8 max_nb_bit = float(2**(bitDepth - 1)) frames = frames / (max_nb_bit + 1.0) wav.close() return frames, frame_rate
def get_wav_info(file_path): wav = wave.open(file_path, 'r') frames = wav.readframes(-1) frames = pylab.fromstring(frames, 'Int16') frame_rate = wav.getframerate() byteDepth = wav.getsampwidth() bitDepth = byteDepth * 8 max_nb_bit = float(2**(bitDepth-1)) frames = frames / (max_nb_bit + 1.0) wav.close() return frames, frame_rate
def extract_frequencies(tune, duration=None, normalized=False): tune = wave.open(tune, 'r') try: frames = tune.readframes(-1) if not duration else tune.readframes(duration*SAMP_RATE) freqs = pylab.fromstring(frames, np.int16) / 1.0 # Normalize frequencies if normalized: freqs = freqs / float(2 ** 15) finally: tune.close() return freqs
def get_wav_info(wav_file): wav = wave.open(wav_file, 'r') frames = wav.readframes(-1) sound_info = pylab.fromstring(frames, 'Int16') frame_rate = wav.getframerate() number_of_channels = wav.getnchannels() sample_width = wav.getsampwidth() print len(sound_info) print frame_rate print number_of_channels print sample_width wav.close() # return sound_info[1000000:1160000], frame_rate return sound_info, frame_rate
def capture_frame(self, gray=True, tries=20, output_file=''): """This method requests a frame to the FPGA. :param bool get_gray: if true, a gray-scale image will be requested. If false, the requested image will be RGB. :param int tries: number of times that the system will try to obtain the requested image. After the last try, the system will exit. :param str output_file: URL name of the output file were the image will be stored. If left blank, the image won't be saved. :return: image contained in an array with dimensions specified in the configuration file. If gray color is True, the dim value will be 1, and 3 for False (representing color images). dim equals to the number of components per pixel. :rtype: MxNxdim numpy.array """ # Request a frame capture to the socket client message = self._client.write_command('GET_NEW_FRAME', True) while message != "Image captured.\n": if not tries: logger.warn("Stop waiting for a frame after 20 tries") sys.exit() tries -= 1 # Timeout error means that the FPGA buffer is empty. If this # happens, it will try to read the buffer again. try: message = self._client.recv(self._client.buffer_size) except socket.timeout: pass logger.debug(repr("'{}' after {} tries.".format(message, 20 - tries))) # Set dim (dimensions) to the number of components per pixel. if gray: command = 'GET_GRAY_IMAGE' dim = 1 shape = (self._params['height'], self._params['width']) else: command = 'GET_COLOR_IMAGE' dim = 3 shape = (self._params['height'], self._params['width'], dim) self._client.write_command(command) logger.debug("'{}' command sent.".format(command)) # SIZE = Width x Height x Dimensions img_size = self._params['width'] * self._params['height'] * dim data = self._client.read_data(img_size) image = pylab.fromstring(data, dtype=pylab.uint8).reshape(shape) if output_file: misc.imsave(output_file, image) return image
def get_info(wav_file): ''' Args: wav_file path Returns: Get the frame rate and sound_info array. ''' wav = wave.open(wav_file, 'r') frames = wav.readframes(-1) sound_info = pylab.fromstring(frames, 'Int16') frame_rate = wav.getframerate() wav.close() return sound_info, frame_rate
def spec(FILE): """ """ spf = wv.open(FILE,'r') sound_info = spf.readframes(-1) sound_info = plb.fromstring(sound_info, 'Int16') f = spf.getframerate() plt.figure(num=None, figsize=(16, 7), dpi=80, edgecolor='k') p, freqs, t, im = plb.specgram(sound_info, Fs = f, scale_by_freq=True, sides='default',) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') font = {'size' : 18} plt.rc('font', **font) plt.bbox_inches='tight' #plt.tight_layout() plt.show(block=False) return p, freqs, t, im
def joint_callback(self, msg): t = rospy.Time.now().to_sec() if self.t0 == 0: self.t0 = t if t - self.tjs > 0.1: # perform update self.tjs = t # append self.t.append(t - self.t0) for i, name in enumerate(msg.name): if name in self.names: j = self.names.index(name) self.data[j].append((msg.position[i] - self.qmin[j]) / (self.qmax[j] - self.qmin[j])) # clean for idx in range(len(self.t)): if self.t[-1] - self.t[idx] < 10: break self.t = self.t[idx:] for i in range(self.n): self.data[i] = self.data[i][idx:] self.line[i].set_data(self.t, self.data[i]) self.line[i + 1].set_data([self.t[0], self.t[-1]], [0, 0]) self.line[i + 2].set_data([self.t[0], self.t[-1]], [1, 1]) # update time scrolling if len(self.t) > 10: self.ax.set_xlim(self.t[0], self.t[-1]) self.canvas.draw() if self.pub: # publish plot as an image - used to do videos w, h = self.canvas.get_width_height() im = pl.fromstring(self.canvas.tostring_rgb(), dtype='uint8').reshape(h, w, 3) im_msg = CvBridge().cv2_to_imgmsg(im) self.pub.publish(im_msg)
def convert(infilename): infile = open(infilename, "r") contents = infile.read() infile.close() flat_data = pylab.fromstring(contents, "uint16") if len(flat_data) != image_width * image_height: print "data has length", len(flat_data), "which does not match", image_width, "*", image_height exit() data = [] for j in range(image_height): data.append(flat_data[j * image_width : (j + 1) * image_width]) data = pylab.array(data) header = pyfits.core.Header() header.update("SIMPLE", "T") header.update("BITPIX", 16) header.update("EXTEND", "T") pyfits.writeto(infilename.replace(".raw", ".fits"), data, header)
def getSound(self): print('self.gettingSound= ',self.gettingSound) if self.lowpass : FIR_COFF= FIR_COFF_HP #else : # FIR_COFF= FIR_COFF_HP #global globalSound, globalSoundTime, globalGettingSound, iS spBufferSize= self.spBufferSize fftWindowSize= self.fftWindowSize Fir_output = [] t0= time.time() while (self.gettingSound is True): try: self.b= b= self.iS.read(spBufferSize) # 1024 except IOError: pass x= pl.fromstring(b,'int16') x= x.astype('float32') if self.loopback : self.xBuf[self.frameI%self.frameN]= self.volume*x else : x *= scipy.signal.triang(len(x)) self.xBuf[self.frameI%self.frameN]= x #del Fir_output[:] ### this is loopback line in/ out function t= time.time()-t0 # sec self.t= t self.x= x self.frameI +=1 print('self.gettingSound= ',self.gettingSound) self.iS.stop_stream()
def convert(infilename): infile = open(infilename, "r") contents = infile.read() infile.close() flat_data = pylab.fromstring(contents, "uint16") if len(flat_data) != image_width * image_height: print "data has length", len( flat_data), "which does not match", image_width, "*", image_height exit() data = [] for j in range(image_height): data.append(flat_data[j * image_width:(j + 1) * image_width]) data = pylab.array(data) header = pyfits.core.Header() header.update("SIMPLE", "T") header.update("BITPIX", 16) header.update("EXTEND", "T") pyfits.writeto(infilename.replace(".raw", ".fits"), data, header)
def plot_waves(paths_to_tunes, begin=0, n_seconds=4): fig, axs = plt.subplots(len(paths_to_tunes), 1) for ax, tune in zip(axs.flat, paths_to_tunes): tail, track = os.path.split(tune) tail, dir1 = os.path.split(tail) _, dir2 = os.path.split(tail) tune = wave.open(tune, 'r') frames = tune.readframes(-1) max_frame = int(begin * SAMP_RATE) + int(n_seconds * SAMP_RATE) if max_frame > len(frames): print "Please select a shorter window" pass freqs = pylab.fromstring(frames, 'Int16') # Normalize frequencies freqs /= float(2**15) tune.close() # Matplotlib does not handle non-ASCII characters too well so: ascii_title = ''.join(i for i in track[:-4] if ord(i) < 128) ascii_album = ''.join(i for i in dir1 if ord(i) < 128) ascii_artist = ''.join(i for i in dir2 if ord(i) < 128) time = np.arange(begin, begin + n_seconds, 1.0 / SAMP_RATE) ax.plot(time, freqs[int(begin * SAMP_RATE):max_frame]) ax.set_title(ascii_artist + " - " + ascii_album + ":\n" + ascii_title, fontsize=10) ax.tick_params(labelsize=8) ax.set_xlabel("Time (s)", fontsize=8) ax.set_ylabel("Amplitde (AU)", fontsize=8) ax.set_xlim([begin, begin + n_seconds]) ax.set_ylim([-1, 1]) fig.tight_layout() fig.savefig('waveforms.png')
def get(self, request, start=0, stop=-1): spf = wave.open(StringIO(self.sound), 'r') framerate = spf.getframerate() start = float(start) stop = float(stop) fig = figure() ax = fig.add_subplot(111, frameon=False) ax.set_xticks([]) ax.set_yticks([]) ax.set_xlim(start*framerate, stop*framerate if stop != -1 else spf.getnframes()) ax.plot(fromstring(spf.readframes(-1), 'Int16'), color='black') fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0) #fig.set_size_inches(1.78, 0.5) output = StringIO() fig.savefig(output, format='png') spf.close() return HTTPResponse(HTTPPreamble(headers=PNG_HEADERS.copy()), body=output.getvalue())
def plot_waves(paths_to_tunes, begin=0, n_seconds=4): fig, axs = plt.subplots(len(paths_to_tunes), 1) for ax, tune in zip(axs.flat, paths_to_tunes): tail, track = os.path.split(tune) tail, dir1 = os.path.split(tail) _, dir2 = os.path.split(tail) tune = wave.open(tune, 'r') frames = tune.readframes(-1) max_frame = int(begin * SAMP_RATE) + int(n_seconds * SAMP_RATE) if max_frame > len(frames): print "Please select a shorter window" pass freqs = pylab.fromstring(frames, 'Int16') # Normalize frequencies freqs /= float(2 ** 15) tune.close() # Matplotlib does not handle non-ASCII characters too well so: ascii_title = ''.join(i for i in track[:-4] if ord(i) < 128) ascii_album = ''.join(i for i in dir1 if ord(i) < 128) ascii_artist = ''.join(i for i in dir2 if ord(i) < 128) time = np.arange(begin, begin + n_seconds, 1.0 / SAMP_RATE) ax.plot(time, freqs[int(begin * SAMP_RATE):max_frame]) ax.set_title(ascii_artist + " - " + ascii_album + ":\n" + ascii_title, fontsize=10) ax.tick_params(labelsize=8) ax.set_xlabel("Time (s)", fontsize=8) ax.set_ylabel("Amplitde (AU)", fontsize=8) ax.set_xlim([begin, begin + n_seconds]) ax.set_ylim([-1, 1]) fig.tight_layout() fig.savefig('waveforms.png')
def speech_detector(msg): global speech_frames_pub global speech_frames global cpt_silences global std_threshold global nb_silences_max nb_speech_frames_min = 0 #1 nb_frames_min = nb_speech_frames_min + nb_silences_max frame = msg.data frame_np = pylab.fromstring(frame, 'Int16') std = pylab.std(frame_np) nb_frames = len(speech_frames) if std > std_threshold: print 'speech?' speech_frames.append(frame) cpt_silences = nb_silences_max if nb_frames == 0: status_pub.publish('record') else: if cpt_silences == 0: if nb_frames > nb_frames_min: print 'speech detected in', nb_frames - nb_silences_max, 'chunks' speech_buffer = ''.join(speech_frames) speech_frames_pub.publish(speech_buffer) status_pub.publish('stop') elif nb_frames != 0: print 'nope...' status_pub.publish('cancel') speech_frames = [] else: print 'remaining silences:', cpt_silences speech_frames.append(frame) cpt_silences -= 1
#! /usr/bin/env python import sys, pylab x = 5**5**5 t = '$5^{5^5}$' l = pylab.fromstring(str(x), dtype='uint8') - 48 #Remove ASCII 0 offset bin_var = pylab.arange(10) bin_cnt = pylab.bincount(l, minlength=10).astype('double') bin_cnt /= bin_cnt.sum() bin_cnt -= bin_cnt.mean() bin_srt = bin_cnt.argsort() pylab.figure(1) pylab.barh(bin_var, bin_cnt[bin_srt], align='center') pylab.yticks(bin_var, bin_srt) pylab.axis(ymin=-0.5, ymax=9.5) pylab.axvline(0) pylab.ylabel(r'Decimal Digits (sorted by frequency)') pylab.xlabel(r'Mean Centred Normalised Frequency') pylab.title(r'%s is %d decimal digits long' % (t, len(l))) pylab.grid(True) pylab.savefig('long.png') pylab.show()
import tables,pylab profile=tables.openFile('gyro_profile.h5') n_field=profile.root.n_field.read() n_kinetic=profile.root.n_kinetic.read() profile.close() fluxfile=open('gbflux.out') fluxstr=fluxfile.read() newfluxstr=fluxstr.replace('\n',' ') fluxarray=pylab.fromstring(newfluxstr,sep=' ') fluxtensor=fluxarray.reshape([n_kinetic,n_field,4,457]) fluxfile.close() pylab.plot(fluxtensor[0,0,1,:]) pylab.show()
def getSound(self): ''' 抓音,作即時 DSP,算出 en, f0, fft 利用 self.gettingSound 來中止內部無窮迴圈。 ''' print('self.gettingSound= ',self.gettingSound) #global globalSound, globalSoundTime, globalGettingSound, iS spBufferSize= self.spBufferSize fftWindowSize= self.fftWindowSize t0= time.time() while (self.gettingSound is True): # 利用 self.gettingSound= False 來中止此無窮迴圈。 # # 抓音只需一行,抓成 bytestring # self.b= b= self.iS.read(spBufferSize) # 1024 # # 以下是開始作即時 DSP 了。 # # # bytestring --> int16 --> float32 # x= pl.fromstring(b,'int16') x= x.astype('float32') # # 新鮮的語音波形, 趕快存下來,才能原音重現。 # self.xBuf[self.frameI%self.frameN]= x # # --> zeroMean (zeroMean 最好不要在這裡做,因為這裡只有 1 個 frame) # #mu= x.mean() #x -= mu # # 自從 開始錄音以來所經歷的時間,以秒為單位。 # t= time.time()-t0 # sec # # hamming window # x *= scipy.signal.hamming(len(x)) # # 能量,訊號的平方的平均值。 # en= (x*x).mean() # # 訊號的複數頻譜, 只保存 一半 的 頻率資訊 # xFFT= pl.fft(x)[0:spBufferSize/2] # for x be real, a half range of fft is enough # # 訊號的功率頻譜(正實數值), 如上,只保存 一半 的 頻率資訊 # xP= pl.absolute(xFFT*xFFT.conj()) # Power spectrum sumXp= xP.sum() self.t= t # 自從 開始錄音以來所經歷的時間,以秒為單位。 self.x= x # 當下的語音振幅值。 (1 個 音框) self.en= en # 當下的語音能量值。 self.fft= xFFT # 當下的 fft 複數頻譜 # # 從當下 第 .frameI 音框,往前算 .frameN 個 音框 的 功率頻譜 (正實數) 以及 語音振幅值。 # self.specgram[self.frameI%self.frameN]= xP # # 在這裡存下的語音 已經被 hamming window 破壞!!必須提前 存下來才行。 # # self.xBuf[self.frameI%self.frameN]= x # # 當下 第 .frameI 音框,遞增。 # self.frameI +=1 # # 簡單的 基本頻率 fundamental frequency (f0) 抽取 演算法。 # startF0= spBufferSize/16 self.f0= startF0 + xP[startF0:spBufferSize/4].argmax() # 0< f0 < spBufferSize/2 # 頻率 範圍, 0<= k < spBufferSize/2 k= pl.arange(spBufferSize/2) # 頻率平均值 self.fm= (xP*k).sum()/sumXp # 頻率變異數 self.fv= (xP*k**2).sum()/sumXp - self.fm**2 # 頻率標準差 self.fs= self.fv**0.5 # 頻率亂度 (entropy) self.entropy= -(xP*pl.log(xP/sumXp)).sum()/sumXp # # 把 f0 歸一化 (0< f0 < 0.5) # 注意,以下算式只會讓 f0 最大為 0.5 # f0=1 代表 訊號取樣頻率(Fs)= 16000 Hz, in the default case # f0=0.5 就代表 真實的 f0= 0.5*Fs = 8000 Hz, in the default case self.f0 /= self.spBufferSize # # fm 也可以 類似的歸一化 # self.fm /= self.spBufferSize # # fv, fs 也如法炮製,但意義不易明白,我也沒運用過,先放著,以後有機會再詳細研究。 # self.fv /= self.spBufferSize # this is not much meaningful self.fs /= self.spBufferSize # # 功率頻譜的全通(all-pass)平均,低通(low-pass)平均,高通(high-pass)平均 # self.enP= xP.mean() self.enPL= xP[0:spBufferSize/16].mean() self.enPH= xP[spBufferSize/16:].mean() # # # 簡單的 DSP 先做到這裡,還有一些較複雜的比如Formant, mfcc, 更準的 f0 等,需要查閱參考資料。 # # print('self.gettingSound= ',self.gettingSound) self.iS.stop_stream()
import pylab from matplotlib.backends.backend_agg import FigureCanvasAgg try: from PIL import Image except ImportError: raise SystemExit("PIL must be installed to run this example") pylab.plot([1,2,3]) canvas = pylab.get_current_fig_manager().canvas agg = canvas.switch_backends(FigureCanvasAgg) agg.draw() s = agg.tostring_rgb() # get the width and the height to resize the matrix l,b,w,h = agg.figure.bbox.bounds w, h = int(w), int(h) X = pylab.fromstring(s, pylab.uint8) X.shape = h, w, 3 im = Image.fromstring( "RGB", (w,h), s) # Uncomment this line to display the image using ImageMagick's # `display` tool. # im.show()
def load_raw_data(filename): infile = open(filename, "r") data = pylab.fromstring(infile.read(), "uint16") infile.close() return data
status_pub.publish('cancel') speech_frames = [] else: print 'remaining silences:', cpt_silences speech_frames.append(frame) cpt_silences -= 1 if __name__ == '__main__': speech_frames_pub = rospy.Publisher('speech_frames', std_msgs.msg.String, queue_size=10) status_pub = rospy.Publisher('speech_status', std_msgs.msg.String) rospy.init_node('speech_detector', anonymous=True) rospy.Subscriber('micro_frames', std_msgs.msg.String, speech_detector) std_threshold = int(rospy.get_param('~threshold', '500')) nb_silences_max = int(rospy.get_param('~silences', '4')) print 'threshold:', std_threshold print 'silences: ', nb_silences_max print while not rospy.is_shutdown(): figure = pylab.figure(1) speech_frames_copy = list(speech_frames) # prevent from modification if speech_frames_copy != []: figure.clf() data = pylab.fromstring(''.join(speech_frames_copy), 'Int16') pylab.plot(data) pylab.ylim([-20000, 20000]) pylab.xlim([0, len(data)]) figure.show() pylab.pause(0.000001)