예제 #1
0
def time_labels_interval(wf: wave.Wave_read, seconds, points=None):
    if seconds:
        labels = np.arange(seconds[0], seconds[1], 1. / wf.getframerate())
        if points:
            start = int((len(labels) - points) / 2)
            end = start + points
            return labels[start:end]
        else:
            return labels
    else:
        if points is None:
            points = wf.getnframes()
        labels = np.linspace(0, wf.getnframes() / wf.getframerate(), num=points)
        return labels
예제 #2
0
def play_audio(wf:wave.Wave_read):

    CHUNK = 1024

    # instantiate PyAudio (1)
    p = pyaudio.PyAudio()

    # open stream (2)
    stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                    channels=wf.getnchannels(),
                    rate=wf.getframerate(),
                    output=True)

    # read data
    data = wf.readframes(CHUNK)

    # play stream (3)
    while len(data) > 0:
        stream.write(data)
        data = wf.readframes(CHUNK)

    stream.stop_stream()
    stream.close()

    p.terminate()
예제 #3
0
파일: peaks.py 프로젝트: tomas-jezek/Python
def fourier(audio: wave.Wave_read) -> Tuple[Optional[int], Optional[int]]:
    """Fourierova analýza vstupních dat, vracející (nejnižší, nejvyšší) frekvenci."""
    # data
    length = audio.getnframes()
    sample_rate = audio.getframerate()
    windows_count = length // sample_rate
    channels = 1 if audio.getnchannels() == 1 else 2  # Stereo (2) vs. Mono (1)
    frames = sample_rate * windows_count

    data = np.array(unpack(f"{channels * frames}h", audio.readframes(frames)))
    if channels == 2:
        data = merge_channels(data)

    # amplitudy
    low, high = None, None
    for i in range(windows_count):
        bounds = (i * sample_rate, i * sample_rate + sample_rate)
        window = data[bounds[0]:bounds[1]]
        amplitudes = np.abs(np.fft.rfft(window))
        average = np.average(amplitudes)

        # peaks
        peak = lambda amp: amp >= 20 * average  # ze zadání
        for j in range(len(amplitudes)):
            amplitude = amplitudes[j]
            if not peak(amplitude):
                continue
            if not low:
                low = j
                high = j
            else:
                high = j
    if not any((low, high)):
        return None, None
    return (high, low) if high < low else (low, high)  # Může být totiž prohozené
예제 #4
0
def filter_lowpass(wav: Wave_read, cutoff: int):
    signal = wav.data
    signal = np.fromstring(signal, "Int16")

    index = -1
    frames = []
    for frame in signal:
        index += 1
        if abs(frame) < cutoff:
            frames.append(10)
            pass
        else:
            frames.append(frame)

    wav.close()

    filtered: wave.Wave_write = wave.open(join(const.AUDIO_DIR, 'temp.wav'),
                                          'w')
    filtered.setframerate(wav.getframerate())
    filtered.setsampwidth(wav.getsampwidth())
    filtered.setnchannels(wav.getnchannels())
    for frame in frames:
        data = struct.pack('<h', frame)
        filtered.writeframesraw(data)
    filtered.close()
    return wave.open(join(const.AUDIO_DIR, 'temp.wav'), 'r')
예제 #5
0
 def __get_wav_stats(self, audio: wave.Wave_read):
     return {
         "waveform": audio,
         "frameRate": audio.getframerate(),
         "nChannels": audio.getnchannels(),
         "sampWidth": audio.getsampwidth()
     }
예제 #6
0
def get_bitrate(wave_obj:wave.Wave_read):
    framerate = wave_obj.getframerate()
    num_channels = wave_obj.getnchannels()
    sample_width = wave_obj.getsampwidth()

    bitrate = (framerate * num_channels * sample_width) / 1000

    return bitrate
예제 #7
0
def filter_lowpassTest(wav: Wave_read, cutoff: int):
    signal = wav.readframes(-1)
    signal = np.fromstring(signal, "Int16")

    filtered: wave.Wave_write = wave.open(join(const.AUDIO_DIR, 'temp.wav'),
                                          'w')
    filtered.setframerate(wav.getframerate())
    filtered.setsampwidth(wav.getsampwidth())
    filtered.setnchannels(wav.getnchannels())
    for frame in frames:
        data = struct.pack('<h', frame)
        filtered.writeframesraw(data)
    filtered.close()
    return wave.open(join(const.AUDIO_DIR, 'temp.wav'), 'r')
예제 #8
0
def encode_audio(wav: wave.Wave_read) -> bytes:
    
    print('audio_encode_init {} {}'.format(wav.getframerate(), wav.getframerate() // 50))
    enclib.audio_encode_init(c_int(wav.getframerate()))
    
    words_per_frame = c_int.in_dll(enclib, 'gl_number_of_16bit_words_per_frame').value
    in_data = FLOATARRAY_TYPE()
    data = bytearray()
    nn = 0
    #print(FLOATARRAY_TYPE.from_buffer_copy)
    for n, c in enumerate(iter_wav_data(wav, CHUNK_SIZE, CHUNK_SIZE)):
        for i, s in enumerate(c):
            in_data[i*2] = s & 0xff
            in_data[i*2+1] = s >> 8
        gl_history = (c_uint8 * 640).in_dll(enclib, 'gl_history')
        if n == 0:
            print('gl_history={}'.format(hexlify(gl_history)))
        result = enclib.audio_encode(in_data)  
        gl_out_words = (c_uint8 * (words_per_frame * 2)).in_dll(enclib, 'gl_out_words')
        gl_mlt_coefs = (c_uint8 * 640).in_dll(enclib, 'gl_mlt_coefs')
        gl_history = (c_uint8 * 640).in_dll(enclib, 'gl_history')
        gl_mag_shift = c_int.in_dll(enclib, 'gl_mag_shift').value
        #print('gl_mag_shift={}'.format(gl_mag_shift))
        #if nn < 2:
            #print('gl_mlt_coefs={}'.format(hexlify(gl_mlt_coefs)))
            #print('gl_history={}'.format(hexlify(gl_history)))
            #print("in_data: len={} {}".format(len(in_data), hexlify(in_data)))
            #print("out_data: len={} {}".format(len(gl_out_words), hexlify(gl_out_words)))
        data.extend(gl_out_words[:])
        nn += 1
    #print('nn: {}'.format(nn))
    nframes = c_int.in_dll(enclib, 'gl_frame_cnt').value
    
    print('nframes: {} words_per_frame: {}'.format(nframes, words_per_frame))
    header = get_file_header(sample_rate=wav.getframerate(), frames = nframes, words_per_frame = words_per_frame)
    print('data len: {}'.format(len(data)))
    return header + data
def print_audio_samples(wave_read: wave.Wave_read,
                        pos_sec=0,
                        steps=1,
                        length_ms=2_000):
    rate = wave_read.getframerate()
    start_frame = rate * pos_sec
    wave_read.readframes(start_frame)
    end_frame = start_frame + (rate * length_ms // 1000)
    print("Reading from = %s to = %s, with step = %s" %
          (start_frame, end_frame, steps))
    string_buffer = []
    for i in range(start_frame, end_frame, steps):
        wave_read.setpos(i)
        peak = wave_read.readframes(1)
        string_buffer.append(str(peak[0]))
    print(','.join(string_buffer))
예제 #10
0
def time_labels(wave_file: wave.Wave_read, points=None):
    if points is None:
        points = wave_file.getnframes()
    ts = np.linspace(0, wave_file.getnframes() / wave_file.getframerate(), num=points)
    return ts
예제 #11
0
def main(args):

    #information of voice file (include: file name + start sapmple of speech+ end sapmle of speech + end sample of file) that write in the dataset.txt
    info = []

    #direction input address
    dir_files = glob.glob("*.wav")

    #sort input(not necessary)
    dir_files.sort()

    #an array that keep end of sample of file
    end_sample_file = []

    #start sample of speech
    start_sample_speech = []

    #end sample of speech
    end_sample_speech = []

    #name of orfinal file that cut postfix(not ncessary)
    fileName = []

    #start time(ms) of speech in voice files
    st = []

   #end time(ms) of speech in voice files
    et = []

    #sample rate of all voice file
    sample_rates = []
    #counter in the loop
    count = 0
    #loop in directory
    for n in dir_files:
        #open voice file 
        vc=wave.open(n)

        #append end sample of file in the array
        end_sample_file.append(Wave_read.getnframes(vc))

        #append sample rate of voice file in the aray
        sample_rates.append(Wave_read.getframerate(vc))

        #read_wave is a function that get voice file directory and return audio(in spation format)
        audio, sample_rate = read_wave(n)

        #this is a function of webrtcvad that get a parameter (integer between 0,3) that defind Accurancy
        vad = webrtcvad.Vad(3)
        #generate fram (first parameter is size of window )
        frames = frame_generator(10, audio, sample_rate)
        frames = list(frames)

        #this is main function that recognize speech in the voice file
        segments = vad_collector(sample_rate, 30, 300, vad, frames)
        
        #this for create a voice file that cut unvoiced part of orginal voice file and saved in a new file
        for i, segment in enumerate(segments):
            path = 'edited_'+n
            write_wave(path, segment, sample_rate)

        #split name of filefrom postfix of orginal file (not necessary)
        temp_str=n.split('.')
        fileName.append(temp_str[0])

        #start time(ms) of speech in the voice file
        st.append(stm[-1])
        print('start time (ms) of speech ',n,' is',st[-1])
        #start time(ms) of speech in the voice file
        et.append(etm[-1])
        print('end time (ms) of speech ',n,' is',et[-1])

        #note!
            #stm and etm that use in the vad_collector function are start time and end time of 
                #voice file but because of noise in file maybe those variable get noise time  
                #instead of speech time but in the last position in the array always has a speech
                #time . more information in the vad_collector function
        count = count+1

    #convert all start time of speech in time to sample and saved in satart_samle
    for i in range(0,len(st)):
        start_sample_speech.append(st[i]*sample_rates[i])

    #convert all end time of speech in time to sample and saved in end_samle
    for i in range(0,len(et)):
        end_sample_speech.append(et[i]*sample_rates[i])

    #fill informatio of voice file
    for i in range(0,len(fileName)):
        info.append(fileName[i]+' '+str(int(start_sample_speech[i]))+' '+str(int(end_sample_speech[i]))+' '+str(end_sample_file[i]))

    #write info in the file
    f = open('dataset.txt','w')
    for n in info:
        f.write(n+'\n')
    f.close()
예제 #12
0
 def __samples_to_millis(wav_file: Wave_read, samples: int) -> int:
     return int((samples / wav_file.getframerate()) * 1000)