示例#1
0
def fourier(audio: wave.Wave_read) -> Tuple[Optional[int], Optional[int]]:
    """Fourierova analýza vstupních dat, vracející (nejnižší, nejvyšší) frekvenci."""
    # data
    length = audio.getnframes()
    sample_rate = audio.getframerate()
    windows_count = length // sample_rate
    channels = 1 if audio.getnchannels() == 1 else 2  # Stereo (2) vs. Mono (1)
    frames = sample_rate * windows_count

    data = np.array(unpack(f"{channels * frames}h", audio.readframes(frames)))
    if channels == 2:
        data = merge_channels(data)

    # amplitudy
    low, high = None, None
    for i in range(windows_count):
        bounds = (i * sample_rate, i * sample_rate + sample_rate)
        window = data[bounds[0]:bounds[1]]
        amplitudes = np.abs(np.fft.rfft(window))
        average = np.average(amplitudes)

        # peaks
        peak = lambda amp: amp >= 20 * average  # ze zadání
        for j in range(len(amplitudes)):
            amplitude = amplitudes[j]
            if not peak(amplitude):
                continue
            if not low:
                low = j
                high = j
            else:
                high = j
    if not any((low, high)):
        return None, None
    return (high, low) if high < low else (low, high)  # Může být totiž prohozené
示例#2
0
def filter_lowpass(wav: Wave_read, cutoff: int):
    signal = wav.data
    signal = np.fromstring(signal, "Int16")

    index = -1
    frames = []
    for frame in signal:
        index += 1
        if abs(frame) < cutoff:
            frames.append(10)
            pass
        else:
            frames.append(frame)

    wav.close()

    filtered: wave.Wave_write = wave.open(join(const.AUDIO_DIR, 'temp.wav'),
                                          'w')
    filtered.setframerate(wav.getframerate())
    filtered.setsampwidth(wav.getsampwidth())
    filtered.setnchannels(wav.getnchannels())
    for frame in frames:
        data = struct.pack('<h', frame)
        filtered.writeframesraw(data)
    filtered.close()
    return wave.open(join(const.AUDIO_DIR, 'temp.wav'), 'r')
示例#3
0
def play_audio(wf:wave.Wave_read):

    CHUNK = 1024

    # instantiate PyAudio (1)
    p = pyaudio.PyAudio()

    # open stream (2)
    stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                    channels=wf.getnchannels(),
                    rate=wf.getframerate(),
                    output=True)

    # read data
    data = wf.readframes(CHUNK)

    # play stream (3)
    while len(data) > 0:
        stream.write(data)
        data = wf.readframes(CHUNK)

    stream.stop_stream()
    stream.close()

    p.terminate()
示例#4
0
 def __get_wav_stats(self, audio: wave.Wave_read):
     return {
         "waveform": audio,
         "frameRate": audio.getframerate(),
         "nChannels": audio.getnchannels(),
         "sampWidth": audio.getsampwidth()
     }
示例#5
0
def get_bitrate(wave_obj:wave.Wave_read):
    framerate = wave_obj.getframerate()
    num_channels = wave_obj.getnchannels()
    sample_width = wave_obj.getsampwidth()

    bitrate = (framerate * num_channels * sample_width) / 1000

    return bitrate
def remove_data(fileWav, gap):
    # http://stackoverflow.com/questions/2060628/how-to-read-wav-file-in-pythons
    wav_file = Wave_read(file_wav_dir + fileWav + ".wav")    
    nframes =  wav_file.getnframes()    
    sample_rate, wav_data = read(file_wav_dir + fileWav + ".wav")
    print wav_data.dtype
    print wav_data.min(), wav_data.max()
    plt.plot(wav_data)
    plt.show()
    start = 0
 def join(self, inputWavfile: wave.Wave_read, start, end):
     length = end - start
     if start < 0 or end < 0 or length < 0:
         raise ValueError("Invalid start value was given")
     params = inputWavfile.getparams()
     if not self.__compareParams(params):
         raise ValueError("File can not be joined due to inappropriate parameters")
     else:
         inputWavfile.setpos(int(start * self.frameRate))
         data = inputWavfile.readframes(int(length * self.frameRate))
         self.__output.writeframes(data)
示例#8
0
 def read(file: wave.Wave_read):
     """
     Reads file and produces an audiodata from its data
     Returns that audiodata
     """
     params = file.getparams()
     frames_number = file.getnframes()
     frames = file.readframes(frames_number)
     characters_per_frame = len(frames) // frames_number
     framesdata = split_frames_into_sounds(frames, characters_per_frame)
     return AudioData(params, framesdata)
def print_audio_samples_all(wave_read: wave.Wave_read):
    n = wave_read.getnframes()
    buffer = []
    count = 0
    for i in range(n):
        sample = wave_read.readframes(1)
        int_version = int.from_bytes(sample, byteorder='little')
        if int_version == 0: count += 1
        if i % 100 == 0:
            # if int_version > (1 << 15): int_version = (1 << 15) - int_version
            buffer.append(int_version)
    print(buffer)
    print(count)
示例#10
0
def filter_lowpassTest(wav: Wave_read, cutoff: int):
    signal = wav.readframes(-1)
    signal = np.fromstring(signal, "Int16")

    filtered: wave.Wave_write = wave.open(join(const.AUDIO_DIR, 'temp.wav'),
                                          'w')
    filtered.setframerate(wav.getframerate())
    filtered.setsampwidth(wav.getsampwidth())
    filtered.setnchannels(wav.getnchannels())
    for frame in frames:
        data = struct.pack('<h', frame)
        filtered.writeframesraw(data)
    filtered.close()
    return wave.open(join(const.AUDIO_DIR, 'temp.wav'), 'r')
示例#11
0
def time_labels_interval(wf: wave.Wave_read, seconds, points=None):
    if seconds:
        labels = np.arange(seconds[0], seconds[1], 1. / wf.getframerate())
        if points:
            start = int((len(labels) - points) / 2)
            end = start + points
            return labels[start:end]
        else:
            return labels
    else:
        if points is None:
            points = wf.getnframes()
        labels = np.linspace(0, wf.getnframes() / wf.getframerate(), num=points)
        return labels
示例#12
0
def iter_wav_data(wav: wave.Wave_read, chunk_size: int, min_padding=0):
    wav.rewind()
    nchunks = wav.getnframes() // chunk_size
    for n in range(0, nchunks):
        d = wav.readframes(chunk_size)
        if len(d) < chunk_size:
            d += b'\0\0' * (chunk_size - len(d))
        a =  array.array('h')
        a.frombytes(d)
        yield a
    if min_padding:
        a =  array.array('h')
        a.frombytes(b'\0\0'*min_padding)
        yield a
示例#13
0
def trim(sound_file: wave.Wave_read, ratio, new_file_path):
    """
    Creates a new trimmed file out of the given one
    :param sound_file: Source file
    :param ratio: The ratio by which the function trims
    :param new_file_path: Path to the output file
    """
    frame_count = sound_file.getnframes()
    target_frame_count = int(frame_count * ratio)

    new_frames = sound_file.readframes(target_frame_count)
    new_file = wave.open(new_file_path, 'w')
    new_file.setparams(sound_file.getparams())
    new_file.writeframes(new_frames)
    new_file.close()
def readAudioFile(fileWav):
    # http://stackoverflow.com/questions/2060628/how-to-read-wav-file-in-pythons
    wav_file = Wave_read(file_wav_dir + fileWav + ".wav")
    nframes =  wav_file.getnframes()    
    sample_rate, wav_data = read(file_wav_dir + fileWav + ".wav")
    mfcc_feat, mspec, spec = mfcc(wav_data,fs = sample_rate)
    print mfcc_feat.shape
    #fbank_feat = logfbank(wav_data, sample_rate)
    #print fbank_feat[1:3,:]    
    
    plt.imshow(mfcc_feat.T,aspect='auto')
    plt.colorbar()
    plt.show()
    
    mfcc_feat =  np.transpose(mfcc_feat)
    print mfcc_feat[0,:].shape
    v1 = deltas_calc(mfcc_feat[0,:])
    print v1
示例#15
0
def transform_nparray(orignal_wave: wave.Wave_read) -> Tuple[np.ndarray, int]:
    """transform wave into ndarray

    Parameters
    ----------
    orignal_wave : file
        wave_read object

    Returns
    -------
    narray : ndarray
        1-d array
    narray_frame : int
        frame_length
    """

    narray_frame = orignal_wave.getnframes()
    narray = orignal_wave.readframes(narray_frame)
    narray = np.frombuffer(narray, dtype="int16")

    return narray, narray_frame
 def _readAudioFile(self,fileWav):
     # http://stackoverflow.com/questions/2060628/how-to-read-wav-file-in-pythons
     wav_file = Wave_read(fileWav)
     self.nframes =  wav_file.getnframes()    
     sample_rate, wav_data = read(fileWav)
     self.duration = self.nframes / float(sample_rate)
     
     winlen = round(self.duration / self.position.shape[1], 6)       # winlen = length of articulatory frames
     mfcc_feat = mfcc(wav_data,sample_rate, 2* winlen, winlen)                  # need to define window length = ??, window step = ??
     #fbank_feat = logfbank(wav_data, sample_rate)
     #print fbank_feat[1:3,:]    
     #plt.plot(mfcc_feat)
     
     mfcc_feat =  np.transpose(mfcc_feat)
     self.mfcc_feature = mfcc_feat[1:13]
     
     self.factor_mfcc = abs(self.mfcc_feature).max()
     self.mfcc_feature = self.mfcc_feature / self.factor_mfcc            # normalize in [-1,1]
     
     veloc, accel = self._get_velocity_acceleration(self.mfcc_feature)
     self.velocity_mfcc = veloc
     self.acceleration_mfcc = accel
示例#17
0
def encode_audio(wav: wave.Wave_read) -> bytes:
    
    print('audio_encode_init {} {}'.format(wav.getframerate(), wav.getframerate() // 50))
    enclib.audio_encode_init(c_int(wav.getframerate()))
    
    words_per_frame = c_int.in_dll(enclib, 'gl_number_of_16bit_words_per_frame').value
    in_data = FLOATARRAY_TYPE()
    data = bytearray()
    nn = 0
    #print(FLOATARRAY_TYPE.from_buffer_copy)
    for n, c in enumerate(iter_wav_data(wav, CHUNK_SIZE, CHUNK_SIZE)):
        for i, s in enumerate(c):
            in_data[i*2] = s & 0xff
            in_data[i*2+1] = s >> 8
        gl_history = (c_uint8 * 640).in_dll(enclib, 'gl_history')
        if n == 0:
            print('gl_history={}'.format(hexlify(gl_history)))
        result = enclib.audio_encode(in_data)  
        gl_out_words = (c_uint8 * (words_per_frame * 2)).in_dll(enclib, 'gl_out_words')
        gl_mlt_coefs = (c_uint8 * 640).in_dll(enclib, 'gl_mlt_coefs')
        gl_history = (c_uint8 * 640).in_dll(enclib, 'gl_history')
        gl_mag_shift = c_int.in_dll(enclib, 'gl_mag_shift').value
        #print('gl_mag_shift={}'.format(gl_mag_shift))
        #if nn < 2:
            #print('gl_mlt_coefs={}'.format(hexlify(gl_mlt_coefs)))
            #print('gl_history={}'.format(hexlify(gl_history)))
            #print("in_data: len={} {}".format(len(in_data), hexlify(in_data)))
            #print("out_data: len={} {}".format(len(gl_out_words), hexlify(gl_out_words)))
        data.extend(gl_out_words[:])
        nn += 1
    #print('nn: {}'.format(nn))
    nframes = c_int.in_dll(enclib, 'gl_frame_cnt').value
    
    print('nframes: {} words_per_frame: {}'.format(nframes, words_per_frame))
    header = get_file_header(sample_rate=wav.getframerate(), frames = nframes, words_per_frame = words_per_frame)
    print('data len: {}'.format(len(data)))
    return header + data
示例#18
0
    def encode_chunk(self, thread_id: str, file: Wave_read,
                     total_samples_to_read: int, output: BytesIO) -> None:
        options = STARTUPINFO()
        options.dwFlags |= subprocess.STARTF_USESHOWWINDOW
        options.wShowWindow = subprocess.SW_HIDE
        process = Popen(self.command,
                        stdin=PIPE,
                        stdout=PIPE,
                        stderr=PIPE,
                        startupinfo=options)

        read_data_thread = Thread(
            target=lambda: output.write(process.stdout.read()))
        read_data_thread.daemon = True
        read_data_thread.start()

        samples_to_read, samples_left = self.update_samples_to_read(
            total_samples_to_read, 1024)
        last_progress = 0
        while samples_left > 0:
            process.stdin.write(file.readframes(samples_to_read))

            progress = int((total_samples_to_read - samples_left) * 100 /
                           total_samples_to_read)
            if progress != last_progress:
                self.listener.encode_update(thread_id, progress)
                last_progress = progress

            samples_to_read, samples_left = self.update_samples_to_read(
                samples_left, 1024)

        self.listener.encode_update(thread_id, 100)
        process.stdin.close()
        read_data_thread.join()
        process.stdout.close()
        process.stderr.close()
        file.close()
def print_audio_samples(wave_read: wave.Wave_read,
                        pos_sec=0,
                        steps=1,
                        length_ms=2_000):
    rate = wave_read.getframerate()
    start_frame = rate * pos_sec
    wave_read.readframes(start_frame)
    end_frame = start_frame + (rate * length_ms // 1000)
    print("Reading from = %s to = %s, with step = %s" %
          (start_frame, end_frame, steps))
    string_buffer = []
    for i in range(start_frame, end_frame, steps):
        wave_read.setpos(i)
        peak = wave_read.readframes(1)
        string_buffer.append(str(peak[0]))
    print(','.join(string_buffer))
示例#20
0
    def _send_packet(self, wave_file: wave.Wave_read, first_packet: bool,
                     transport) -> int:
        frames = wave_file.readframes(FRAMES_PER_PACKET)
        if not frames:
            return 0

        header = AudioPacketHeader.encode(
            0x80,
            0xE0 if first_packet else 0x60,
            self.context.rtpseq,
            self.context.rtptime,
            self.context.session_id,
        )

        # ALAC frame with raw data. Not so pretty but will work for now until a
        # proper ALAC encoder is added.
        audio = bitarray("00" + str(self.context.channels - 1) + 19 * "0" +
                         "1")
        for i in range(0, len(frames), 2):
            audio.frombytes(bytes([frames[i + 1], frames[i]]))

        if transport.is_closing():
            _LOGGER.warning("Connection closed while streaming audio")
            return 0

        packet = header + audio.tobytes()

        # Add packet to backlog before sending
        self._packet_backlog[self.context.rtpseq] = packet
        transport.sendto(packet)

        self.context.rtpseq = (self.context.rtpseq + 1) % (2**16)
        self.context.head_ts += int(
            len(frames) /
            (self.context.channels * self.context.bytes_per_channel))

        return int(
            len(frames) /
            (self.context.channels * self.context.bytes_per_channel))
示例#21
0
 def __init__(self,filename):
     Wave_read.__init__(self,filename)
示例#22
0
def time_labels(wave_file: wave.Wave_read, points=None):
    if points is None:
        points = wave_file.getnframes()
    ts = np.linspace(0, wave_file.getnframes() / wave_file.getframerate(), num=points)
    return ts
示例#23
0
 def __samples_to_millis(wav_file: Wave_read, samples: int) -> int:
     return int((samples / wav_file.getframerate()) * 1000)
示例#24
0
def main(args):

    #information of voice file (include: file name + start sapmple of speech+ end sapmle of speech + end sample of file) that write in the dataset.txt
    info = []

    #direction input address
    dir_files = glob.glob("*.wav")

    #sort input(not necessary)
    dir_files.sort()

    #an array that keep end of sample of file
    end_sample_file = []

    #start sample of speech
    start_sample_speech = []

    #end sample of speech
    end_sample_speech = []

    #name of orfinal file that cut postfix(not ncessary)
    fileName = []

    #start time(ms) of speech in voice files
    st = []

   #end time(ms) of speech in voice files
    et = []

    #sample rate of all voice file
    sample_rates = []
    #counter in the loop
    count = 0
    #loop in directory
    for n in dir_files:
        #open voice file 
        vc=wave.open(n)

        #append end sample of file in the array
        end_sample_file.append(Wave_read.getnframes(vc))

        #append sample rate of voice file in the aray
        sample_rates.append(Wave_read.getframerate(vc))

        #read_wave is a function that get voice file directory and return audio(in spation format)
        audio, sample_rate = read_wave(n)

        #this is a function of webrtcvad that get a parameter (integer between 0,3) that defind Accurancy
        vad = webrtcvad.Vad(3)
        #generate fram (first parameter is size of window )
        frames = frame_generator(10, audio, sample_rate)
        frames = list(frames)

        #this is main function that recognize speech in the voice file
        segments = vad_collector(sample_rate, 30, 300, vad, frames)
        
        #this for create a voice file that cut unvoiced part of orginal voice file and saved in a new file
        for i, segment in enumerate(segments):
            path = 'edited_'+n
            write_wave(path, segment, sample_rate)

        #split name of filefrom postfix of orginal file (not necessary)
        temp_str=n.split('.')
        fileName.append(temp_str[0])

        #start time(ms) of speech in the voice file
        st.append(stm[-1])
        print('start time (ms) of speech ',n,' is',st[-1])
        #start time(ms) of speech in the voice file
        et.append(etm[-1])
        print('end time (ms) of speech ',n,' is',et[-1])

        #note!
            #stm and etm that use in the vad_collector function are start time and end time of 
                #voice file but because of noise in file maybe those variable get noise time  
                #instead of speech time but in the last position in the array always has a speech
                #time . more information in the vad_collector function
        count = count+1

    #convert all start time of speech in time to sample and saved in satart_samle
    for i in range(0,len(st)):
        start_sample_speech.append(st[i]*sample_rates[i])

    #convert all end time of speech in time to sample and saved in end_samle
    for i in range(0,len(et)):
        end_sample_speech.append(et[i]*sample_rates[i])

    #fill informatio of voice file
    for i in range(0,len(fileName)):
        info.append(fileName[i]+' '+str(int(start_sample_speech[i]))+' '+str(int(end_sample_speech[i]))+' '+str(end_sample_file[i]))

    #write info in the file
    f = open('dataset.txt','w')
    for n in info:
        f.write(n+'\n')
    f.close()