def fourier(audio: wave.Wave_read) -> Tuple[Optional[int], Optional[int]]: """Fourierova analýza vstupních dat, vracející (nejnižší, nejvyšší) frekvenci.""" # data length = audio.getnframes() sample_rate = audio.getframerate() windows_count = length // sample_rate channels = 1 if audio.getnchannels() == 1 else 2 # Stereo (2) vs. Mono (1) frames = sample_rate * windows_count data = np.array(unpack(f"{channels * frames}h", audio.readframes(frames))) if channels == 2: data = merge_channels(data) # amplitudy low, high = None, None for i in range(windows_count): bounds = (i * sample_rate, i * sample_rate + sample_rate) window = data[bounds[0]:bounds[1]] amplitudes = np.abs(np.fft.rfft(window)) average = np.average(amplitudes) # peaks peak = lambda amp: amp >= 20 * average # ze zadání for j in range(len(amplitudes)): amplitude = amplitudes[j] if not peak(amplitude): continue if not low: low = j high = j else: high = j if not any((low, high)): return None, None return (high, low) if high < low else (low, high) # Může být totiž prohozené
def filter_lowpass(wav: Wave_read, cutoff: int): signal = wav.data signal = np.fromstring(signal, "Int16") index = -1 frames = [] for frame in signal: index += 1 if abs(frame) < cutoff: frames.append(10) pass else: frames.append(frame) wav.close() filtered: wave.Wave_write = wave.open(join(const.AUDIO_DIR, 'temp.wav'), 'w') filtered.setframerate(wav.getframerate()) filtered.setsampwidth(wav.getsampwidth()) filtered.setnchannels(wav.getnchannels()) for frame in frames: data = struct.pack('<h', frame) filtered.writeframesraw(data) filtered.close() return wave.open(join(const.AUDIO_DIR, 'temp.wav'), 'r')
def play_audio(wf:wave.Wave_read): CHUNK = 1024 # instantiate PyAudio (1) p = pyaudio.PyAudio() # open stream (2) stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), channels=wf.getnchannels(), rate=wf.getframerate(), output=True) # read data data = wf.readframes(CHUNK) # play stream (3) while len(data) > 0: stream.write(data) data = wf.readframes(CHUNK) stream.stop_stream() stream.close() p.terminate()
def __get_wav_stats(self, audio: wave.Wave_read): return { "waveform": audio, "frameRate": audio.getframerate(), "nChannels": audio.getnchannels(), "sampWidth": audio.getsampwidth() }
def get_bitrate(wave_obj:wave.Wave_read): framerate = wave_obj.getframerate() num_channels = wave_obj.getnchannels() sample_width = wave_obj.getsampwidth() bitrate = (framerate * num_channels * sample_width) / 1000 return bitrate
def remove_data(fileWav, gap): # http://stackoverflow.com/questions/2060628/how-to-read-wav-file-in-pythons wav_file = Wave_read(file_wav_dir + fileWav + ".wav") nframes = wav_file.getnframes() sample_rate, wav_data = read(file_wav_dir + fileWav + ".wav") print wav_data.dtype print wav_data.min(), wav_data.max() plt.plot(wav_data) plt.show() start = 0
def join(self, inputWavfile: wave.Wave_read, start, end): length = end - start if start < 0 or end < 0 or length < 0: raise ValueError("Invalid start value was given") params = inputWavfile.getparams() if not self.__compareParams(params): raise ValueError("File can not be joined due to inappropriate parameters") else: inputWavfile.setpos(int(start * self.frameRate)) data = inputWavfile.readframes(int(length * self.frameRate)) self.__output.writeframes(data)
def read(file: wave.Wave_read): """ Reads file and produces an audiodata from its data Returns that audiodata """ params = file.getparams() frames_number = file.getnframes() frames = file.readframes(frames_number) characters_per_frame = len(frames) // frames_number framesdata = split_frames_into_sounds(frames, characters_per_frame) return AudioData(params, framesdata)
def print_audio_samples_all(wave_read: wave.Wave_read): n = wave_read.getnframes() buffer = [] count = 0 for i in range(n): sample = wave_read.readframes(1) int_version = int.from_bytes(sample, byteorder='little') if int_version == 0: count += 1 if i % 100 == 0: # if int_version > (1 << 15): int_version = (1 << 15) - int_version buffer.append(int_version) print(buffer) print(count)
def filter_lowpassTest(wav: Wave_read, cutoff: int): signal = wav.readframes(-1) signal = np.fromstring(signal, "Int16") filtered: wave.Wave_write = wave.open(join(const.AUDIO_DIR, 'temp.wav'), 'w') filtered.setframerate(wav.getframerate()) filtered.setsampwidth(wav.getsampwidth()) filtered.setnchannels(wav.getnchannels()) for frame in frames: data = struct.pack('<h', frame) filtered.writeframesraw(data) filtered.close() return wave.open(join(const.AUDIO_DIR, 'temp.wav'), 'r')
def time_labels_interval(wf: wave.Wave_read, seconds, points=None): if seconds: labels = np.arange(seconds[0], seconds[1], 1. / wf.getframerate()) if points: start = int((len(labels) - points) / 2) end = start + points return labels[start:end] else: return labels else: if points is None: points = wf.getnframes() labels = np.linspace(0, wf.getnframes() / wf.getframerate(), num=points) return labels
def iter_wav_data(wav: wave.Wave_read, chunk_size: int, min_padding=0): wav.rewind() nchunks = wav.getnframes() // chunk_size for n in range(0, nchunks): d = wav.readframes(chunk_size) if len(d) < chunk_size: d += b'\0\0' * (chunk_size - len(d)) a = array.array('h') a.frombytes(d) yield a if min_padding: a = array.array('h') a.frombytes(b'\0\0'*min_padding) yield a
def trim(sound_file: wave.Wave_read, ratio, new_file_path): """ Creates a new trimmed file out of the given one :param sound_file: Source file :param ratio: The ratio by which the function trims :param new_file_path: Path to the output file """ frame_count = sound_file.getnframes() target_frame_count = int(frame_count * ratio) new_frames = sound_file.readframes(target_frame_count) new_file = wave.open(new_file_path, 'w') new_file.setparams(sound_file.getparams()) new_file.writeframes(new_frames) new_file.close()
def readAudioFile(fileWav): # http://stackoverflow.com/questions/2060628/how-to-read-wav-file-in-pythons wav_file = Wave_read(file_wav_dir + fileWav + ".wav") nframes = wav_file.getnframes() sample_rate, wav_data = read(file_wav_dir + fileWav + ".wav") mfcc_feat, mspec, spec = mfcc(wav_data,fs = sample_rate) print mfcc_feat.shape #fbank_feat = logfbank(wav_data, sample_rate) #print fbank_feat[1:3,:] plt.imshow(mfcc_feat.T,aspect='auto') plt.colorbar() plt.show() mfcc_feat = np.transpose(mfcc_feat) print mfcc_feat[0,:].shape v1 = deltas_calc(mfcc_feat[0,:]) print v1
def transform_nparray(orignal_wave: wave.Wave_read) -> Tuple[np.ndarray, int]: """transform wave into ndarray Parameters ---------- orignal_wave : file wave_read object Returns ------- narray : ndarray 1-d array narray_frame : int frame_length """ narray_frame = orignal_wave.getnframes() narray = orignal_wave.readframes(narray_frame) narray = np.frombuffer(narray, dtype="int16") return narray, narray_frame
def _readAudioFile(self,fileWav): # http://stackoverflow.com/questions/2060628/how-to-read-wav-file-in-pythons wav_file = Wave_read(fileWav) self.nframes = wav_file.getnframes() sample_rate, wav_data = read(fileWav) self.duration = self.nframes / float(sample_rate) winlen = round(self.duration / self.position.shape[1], 6) # winlen = length of articulatory frames mfcc_feat = mfcc(wav_data,sample_rate, 2* winlen, winlen) # need to define window length = ??, window step = ?? #fbank_feat = logfbank(wav_data, sample_rate) #print fbank_feat[1:3,:] #plt.plot(mfcc_feat) mfcc_feat = np.transpose(mfcc_feat) self.mfcc_feature = mfcc_feat[1:13] self.factor_mfcc = abs(self.mfcc_feature).max() self.mfcc_feature = self.mfcc_feature / self.factor_mfcc # normalize in [-1,1] veloc, accel = self._get_velocity_acceleration(self.mfcc_feature) self.velocity_mfcc = veloc self.acceleration_mfcc = accel
def encode_audio(wav: wave.Wave_read) -> bytes: print('audio_encode_init {} {}'.format(wav.getframerate(), wav.getframerate() // 50)) enclib.audio_encode_init(c_int(wav.getframerate())) words_per_frame = c_int.in_dll(enclib, 'gl_number_of_16bit_words_per_frame').value in_data = FLOATARRAY_TYPE() data = bytearray() nn = 0 #print(FLOATARRAY_TYPE.from_buffer_copy) for n, c in enumerate(iter_wav_data(wav, CHUNK_SIZE, CHUNK_SIZE)): for i, s in enumerate(c): in_data[i*2] = s & 0xff in_data[i*2+1] = s >> 8 gl_history = (c_uint8 * 640).in_dll(enclib, 'gl_history') if n == 0: print('gl_history={}'.format(hexlify(gl_history))) result = enclib.audio_encode(in_data) gl_out_words = (c_uint8 * (words_per_frame * 2)).in_dll(enclib, 'gl_out_words') gl_mlt_coefs = (c_uint8 * 640).in_dll(enclib, 'gl_mlt_coefs') gl_history = (c_uint8 * 640).in_dll(enclib, 'gl_history') gl_mag_shift = c_int.in_dll(enclib, 'gl_mag_shift').value #print('gl_mag_shift={}'.format(gl_mag_shift)) #if nn < 2: #print('gl_mlt_coefs={}'.format(hexlify(gl_mlt_coefs))) #print('gl_history={}'.format(hexlify(gl_history))) #print("in_data: len={} {}".format(len(in_data), hexlify(in_data))) #print("out_data: len={} {}".format(len(gl_out_words), hexlify(gl_out_words))) data.extend(gl_out_words[:]) nn += 1 #print('nn: {}'.format(nn)) nframes = c_int.in_dll(enclib, 'gl_frame_cnt').value print('nframes: {} words_per_frame: {}'.format(nframes, words_per_frame)) header = get_file_header(sample_rate=wav.getframerate(), frames = nframes, words_per_frame = words_per_frame) print('data len: {}'.format(len(data))) return header + data
def encode_chunk(self, thread_id: str, file: Wave_read, total_samples_to_read: int, output: BytesIO) -> None: options = STARTUPINFO() options.dwFlags |= subprocess.STARTF_USESHOWWINDOW options.wShowWindow = subprocess.SW_HIDE process = Popen(self.command, stdin=PIPE, stdout=PIPE, stderr=PIPE, startupinfo=options) read_data_thread = Thread( target=lambda: output.write(process.stdout.read())) read_data_thread.daemon = True read_data_thread.start() samples_to_read, samples_left = self.update_samples_to_read( total_samples_to_read, 1024) last_progress = 0 while samples_left > 0: process.stdin.write(file.readframes(samples_to_read)) progress = int((total_samples_to_read - samples_left) * 100 / total_samples_to_read) if progress != last_progress: self.listener.encode_update(thread_id, progress) last_progress = progress samples_to_read, samples_left = self.update_samples_to_read( samples_left, 1024) self.listener.encode_update(thread_id, 100) process.stdin.close() read_data_thread.join() process.stdout.close() process.stderr.close() file.close()
def print_audio_samples(wave_read: wave.Wave_read, pos_sec=0, steps=1, length_ms=2_000): rate = wave_read.getframerate() start_frame = rate * pos_sec wave_read.readframes(start_frame) end_frame = start_frame + (rate * length_ms // 1000) print("Reading from = %s to = %s, with step = %s" % (start_frame, end_frame, steps)) string_buffer = [] for i in range(start_frame, end_frame, steps): wave_read.setpos(i) peak = wave_read.readframes(1) string_buffer.append(str(peak[0])) print(','.join(string_buffer))
def _send_packet(self, wave_file: wave.Wave_read, first_packet: bool, transport) -> int: frames = wave_file.readframes(FRAMES_PER_PACKET) if not frames: return 0 header = AudioPacketHeader.encode( 0x80, 0xE0 if first_packet else 0x60, self.context.rtpseq, self.context.rtptime, self.context.session_id, ) # ALAC frame with raw data. Not so pretty but will work for now until a # proper ALAC encoder is added. audio = bitarray("00" + str(self.context.channels - 1) + 19 * "0" + "1") for i in range(0, len(frames), 2): audio.frombytes(bytes([frames[i + 1], frames[i]])) if transport.is_closing(): _LOGGER.warning("Connection closed while streaming audio") return 0 packet = header + audio.tobytes() # Add packet to backlog before sending self._packet_backlog[self.context.rtpseq] = packet transport.sendto(packet) self.context.rtpseq = (self.context.rtpseq + 1) % (2**16) self.context.head_ts += int( len(frames) / (self.context.channels * self.context.bytes_per_channel)) return int( len(frames) / (self.context.channels * self.context.bytes_per_channel))
def __init__(self,filename): Wave_read.__init__(self,filename)
def time_labels(wave_file: wave.Wave_read, points=None): if points is None: points = wave_file.getnframes() ts = np.linspace(0, wave_file.getnframes() / wave_file.getframerate(), num=points) return ts
def __samples_to_millis(wav_file: Wave_read, samples: int) -> int: return int((samples / wav_file.getframerate()) * 1000)
def main(args): #information of voice file (include: file name + start sapmple of speech+ end sapmle of speech + end sample of file) that write in the dataset.txt info = [] #direction input address dir_files = glob.glob("*.wav") #sort input(not necessary) dir_files.sort() #an array that keep end of sample of file end_sample_file = [] #start sample of speech start_sample_speech = [] #end sample of speech end_sample_speech = [] #name of orfinal file that cut postfix(not ncessary) fileName = [] #start time(ms) of speech in voice files st = [] #end time(ms) of speech in voice files et = [] #sample rate of all voice file sample_rates = [] #counter in the loop count = 0 #loop in directory for n in dir_files: #open voice file vc=wave.open(n) #append end sample of file in the array end_sample_file.append(Wave_read.getnframes(vc)) #append sample rate of voice file in the aray sample_rates.append(Wave_read.getframerate(vc)) #read_wave is a function that get voice file directory and return audio(in spation format) audio, sample_rate = read_wave(n) #this is a function of webrtcvad that get a parameter (integer between 0,3) that defind Accurancy vad = webrtcvad.Vad(3) #generate fram (first parameter is size of window ) frames = frame_generator(10, audio, sample_rate) frames = list(frames) #this is main function that recognize speech in the voice file segments = vad_collector(sample_rate, 30, 300, vad, frames) #this for create a voice file that cut unvoiced part of orginal voice file and saved in a new file for i, segment in enumerate(segments): path = 'edited_'+n write_wave(path, segment, sample_rate) #split name of filefrom postfix of orginal file (not necessary) temp_str=n.split('.') fileName.append(temp_str[0]) #start time(ms) of speech in the voice file st.append(stm[-1]) print('start time (ms) of speech ',n,' is',st[-1]) #start time(ms) of speech in the voice file et.append(etm[-1]) print('end time (ms) of speech ',n,' is',et[-1]) #note! #stm and etm that use in the vad_collector function are start time and end time of #voice file but because of noise in file maybe those variable get noise time #instead of speech time but in the last position in the array always has a speech #time . more information in the vad_collector function count = count+1 #convert all start time of speech in time to sample and saved in satart_samle for i in range(0,len(st)): start_sample_speech.append(st[i]*sample_rates[i]) #convert all end time of speech in time to sample and saved in end_samle for i in range(0,len(et)): end_sample_speech.append(et[i]*sample_rates[i]) #fill informatio of voice file for i in range(0,len(fileName)): info.append(fileName[i]+' '+str(int(start_sample_speech[i]))+' '+str(int(end_sample_speech[i]))+' '+str(end_sample_file[i])) #write info in the file f = open('dataset.txt','w') for n in info: f.write(n+'\n') f.close()