def time_labels_interval(wf: wave.Wave_read, seconds, points=None): if seconds: labels = np.arange(seconds[0], seconds[1], 1. / wf.getframerate()) if points: start = int((len(labels) - points) / 2) end = start + points return labels[start:end] else: return labels else: if points is None: points = wf.getnframes() labels = np.linspace(0, wf.getnframes() / wf.getframerate(), num=points) return labels
def play_audio(wf:wave.Wave_read): CHUNK = 1024 # instantiate PyAudio (1) p = pyaudio.PyAudio() # open stream (2) stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), channels=wf.getnchannels(), rate=wf.getframerate(), output=True) # read data data = wf.readframes(CHUNK) # play stream (3) while len(data) > 0: stream.write(data) data = wf.readframes(CHUNK) stream.stop_stream() stream.close() p.terminate()
def fourier(audio: wave.Wave_read) -> Tuple[Optional[int], Optional[int]]: """Fourierova analýza vstupních dat, vracející (nejnižší, nejvyšší) frekvenci.""" # data length = audio.getnframes() sample_rate = audio.getframerate() windows_count = length // sample_rate channels = 1 if audio.getnchannels() == 1 else 2 # Stereo (2) vs. Mono (1) frames = sample_rate * windows_count data = np.array(unpack(f"{channels * frames}h", audio.readframes(frames))) if channels == 2: data = merge_channels(data) # amplitudy low, high = None, None for i in range(windows_count): bounds = (i * sample_rate, i * sample_rate + sample_rate) window = data[bounds[0]:bounds[1]] amplitudes = np.abs(np.fft.rfft(window)) average = np.average(amplitudes) # peaks peak = lambda amp: amp >= 20 * average # ze zadání for j in range(len(amplitudes)): amplitude = amplitudes[j] if not peak(amplitude): continue if not low: low = j high = j else: high = j if not any((low, high)): return None, None return (high, low) if high < low else (low, high) # Může být totiž prohozené
def filter_lowpass(wav: Wave_read, cutoff: int): signal = wav.data signal = np.fromstring(signal, "Int16") index = -1 frames = [] for frame in signal: index += 1 if abs(frame) < cutoff: frames.append(10) pass else: frames.append(frame) wav.close() filtered: wave.Wave_write = wave.open(join(const.AUDIO_DIR, 'temp.wav'), 'w') filtered.setframerate(wav.getframerate()) filtered.setsampwidth(wav.getsampwidth()) filtered.setnchannels(wav.getnchannels()) for frame in frames: data = struct.pack('<h', frame) filtered.writeframesraw(data) filtered.close() return wave.open(join(const.AUDIO_DIR, 'temp.wav'), 'r')
def __get_wav_stats(self, audio: wave.Wave_read): return { "waveform": audio, "frameRate": audio.getframerate(), "nChannels": audio.getnchannels(), "sampWidth": audio.getsampwidth() }
def get_bitrate(wave_obj:wave.Wave_read): framerate = wave_obj.getframerate() num_channels = wave_obj.getnchannels() sample_width = wave_obj.getsampwidth() bitrate = (framerate * num_channels * sample_width) / 1000 return bitrate
def filter_lowpassTest(wav: Wave_read, cutoff: int): signal = wav.readframes(-1) signal = np.fromstring(signal, "Int16") filtered: wave.Wave_write = wave.open(join(const.AUDIO_DIR, 'temp.wav'), 'w') filtered.setframerate(wav.getframerate()) filtered.setsampwidth(wav.getsampwidth()) filtered.setnchannels(wav.getnchannels()) for frame in frames: data = struct.pack('<h', frame) filtered.writeframesraw(data) filtered.close() return wave.open(join(const.AUDIO_DIR, 'temp.wav'), 'r')
def encode_audio(wav: wave.Wave_read) -> bytes: print('audio_encode_init {} {}'.format(wav.getframerate(), wav.getframerate() // 50)) enclib.audio_encode_init(c_int(wav.getframerate())) words_per_frame = c_int.in_dll(enclib, 'gl_number_of_16bit_words_per_frame').value in_data = FLOATARRAY_TYPE() data = bytearray() nn = 0 #print(FLOATARRAY_TYPE.from_buffer_copy) for n, c in enumerate(iter_wav_data(wav, CHUNK_SIZE, CHUNK_SIZE)): for i, s in enumerate(c): in_data[i*2] = s & 0xff in_data[i*2+1] = s >> 8 gl_history = (c_uint8 * 640).in_dll(enclib, 'gl_history') if n == 0: print('gl_history={}'.format(hexlify(gl_history))) result = enclib.audio_encode(in_data) gl_out_words = (c_uint8 * (words_per_frame * 2)).in_dll(enclib, 'gl_out_words') gl_mlt_coefs = (c_uint8 * 640).in_dll(enclib, 'gl_mlt_coefs') gl_history = (c_uint8 * 640).in_dll(enclib, 'gl_history') gl_mag_shift = c_int.in_dll(enclib, 'gl_mag_shift').value #print('gl_mag_shift={}'.format(gl_mag_shift)) #if nn < 2: #print('gl_mlt_coefs={}'.format(hexlify(gl_mlt_coefs))) #print('gl_history={}'.format(hexlify(gl_history))) #print("in_data: len={} {}".format(len(in_data), hexlify(in_data))) #print("out_data: len={} {}".format(len(gl_out_words), hexlify(gl_out_words))) data.extend(gl_out_words[:]) nn += 1 #print('nn: {}'.format(nn)) nframes = c_int.in_dll(enclib, 'gl_frame_cnt').value print('nframes: {} words_per_frame: {}'.format(nframes, words_per_frame)) header = get_file_header(sample_rate=wav.getframerate(), frames = nframes, words_per_frame = words_per_frame) print('data len: {}'.format(len(data))) return header + data
def print_audio_samples(wave_read: wave.Wave_read, pos_sec=0, steps=1, length_ms=2_000): rate = wave_read.getframerate() start_frame = rate * pos_sec wave_read.readframes(start_frame) end_frame = start_frame + (rate * length_ms // 1000) print("Reading from = %s to = %s, with step = %s" % (start_frame, end_frame, steps)) string_buffer = [] for i in range(start_frame, end_frame, steps): wave_read.setpos(i) peak = wave_read.readframes(1) string_buffer.append(str(peak[0])) print(','.join(string_buffer))
def time_labels(wave_file: wave.Wave_read, points=None): if points is None: points = wave_file.getnframes() ts = np.linspace(0, wave_file.getnframes() / wave_file.getframerate(), num=points) return ts
def main(args): #information of voice file (include: file name + start sapmple of speech+ end sapmle of speech + end sample of file) that write in the dataset.txt info = [] #direction input address dir_files = glob.glob("*.wav") #sort input(not necessary) dir_files.sort() #an array that keep end of sample of file end_sample_file = [] #start sample of speech start_sample_speech = [] #end sample of speech end_sample_speech = [] #name of orfinal file that cut postfix(not ncessary) fileName = [] #start time(ms) of speech in voice files st = [] #end time(ms) of speech in voice files et = [] #sample rate of all voice file sample_rates = [] #counter in the loop count = 0 #loop in directory for n in dir_files: #open voice file vc=wave.open(n) #append end sample of file in the array end_sample_file.append(Wave_read.getnframes(vc)) #append sample rate of voice file in the aray sample_rates.append(Wave_read.getframerate(vc)) #read_wave is a function that get voice file directory and return audio(in spation format) audio, sample_rate = read_wave(n) #this is a function of webrtcvad that get a parameter (integer between 0,3) that defind Accurancy vad = webrtcvad.Vad(3) #generate fram (first parameter is size of window ) frames = frame_generator(10, audio, sample_rate) frames = list(frames) #this is main function that recognize speech in the voice file segments = vad_collector(sample_rate, 30, 300, vad, frames) #this for create a voice file that cut unvoiced part of orginal voice file and saved in a new file for i, segment in enumerate(segments): path = 'edited_'+n write_wave(path, segment, sample_rate) #split name of filefrom postfix of orginal file (not necessary) temp_str=n.split('.') fileName.append(temp_str[0]) #start time(ms) of speech in the voice file st.append(stm[-1]) print('start time (ms) of speech ',n,' is',st[-1]) #start time(ms) of speech in the voice file et.append(etm[-1]) print('end time (ms) of speech ',n,' is',et[-1]) #note! #stm and etm that use in the vad_collector function are start time and end time of #voice file but because of noise in file maybe those variable get noise time #instead of speech time but in the last position in the array always has a speech #time . more information in the vad_collector function count = count+1 #convert all start time of speech in time to sample and saved in satart_samle for i in range(0,len(st)): start_sample_speech.append(st[i]*sample_rates[i]) #convert all end time of speech in time to sample and saved in end_samle for i in range(0,len(et)): end_sample_speech.append(et[i]*sample_rates[i]) #fill informatio of voice file for i in range(0,len(fileName)): info.append(fileName[i]+' '+str(int(start_sample_speech[i]))+' '+str(int(end_sample_speech[i]))+' '+str(end_sample_file[i])) #write info in the file f = open('dataset.txt','w') for n in info: f.write(n+'\n') f.close()
def __samples_to_millis(wav_file: Wave_read, samples: int) -> int: return int((samples / wav_file.getframerate()) * 1000)