예제 #1
0
def time_labels_interval(wf: wave.Wave_read, seconds, points=None):
    if seconds:
        labels = np.arange(seconds[0], seconds[1], 1. / wf.getframerate())
        if points:
            start = int((len(labels) - points) / 2)
            end = start + points
            return labels[start:end]
        else:
            return labels
    else:
        if points is None:
            points = wf.getnframes()
        labels = np.linspace(0, wf.getnframes() / wf.getframerate(), num=points)
        return labels
예제 #2
0
파일: peaks.py 프로젝트: tomas-jezek/Python
def fourier(audio: wave.Wave_read) -> Tuple[Optional[int], Optional[int]]:
    """Fourierova analýza vstupních dat, vracející (nejnižší, nejvyšší) frekvenci."""
    # data
    length = audio.getnframes()
    sample_rate = audio.getframerate()
    windows_count = length // sample_rate
    channels = 1 if audio.getnchannels() == 1 else 2  # Stereo (2) vs. Mono (1)
    frames = sample_rate * windows_count

    data = np.array(unpack(f"{channels * frames}h", audio.readframes(frames)))
    if channels == 2:
        data = merge_channels(data)

    # amplitudy
    low, high = None, None
    for i in range(windows_count):
        bounds = (i * sample_rate, i * sample_rate + sample_rate)
        window = data[bounds[0]:bounds[1]]
        amplitudes = np.abs(np.fft.rfft(window))
        average = np.average(amplitudes)

        # peaks
        peak = lambda amp: amp >= 20 * average  # ze zadání
        for j in range(len(amplitudes)):
            amplitude = amplitudes[j]
            if not peak(amplitude):
                continue
            if not low:
                low = j
                high = j
            else:
                high = j
    if not any((low, high)):
        return None, None
    return (high, low) if high < low else (low, high)  # Může být totiž prohozené
def remove_data(fileWav, gap):
    # http://stackoverflow.com/questions/2060628/how-to-read-wav-file-in-pythons
    wav_file = Wave_read(file_wav_dir + fileWav + ".wav")    
    nframes =  wav_file.getnframes()    
    sample_rate, wav_data = read(file_wav_dir + fileWav + ".wav")
    print wav_data.dtype
    print wav_data.min(), wav_data.max()
    plt.plot(wav_data)
    plt.show()
    start = 0
예제 #4
0
 def read(file: wave.Wave_read):
     """
     Reads file and produces an audiodata from its data
     Returns that audiodata
     """
     params = file.getparams()
     frames_number = file.getnframes()
     frames = file.readframes(frames_number)
     characters_per_frame = len(frames) // frames_number
     framesdata = split_frames_into_sounds(frames, characters_per_frame)
     return AudioData(params, framesdata)
def print_audio_samples_all(wave_read: wave.Wave_read):
    n = wave_read.getnframes()
    buffer = []
    count = 0
    for i in range(n):
        sample = wave_read.readframes(1)
        int_version = int.from_bytes(sample, byteorder='little')
        if int_version == 0: count += 1
        if i % 100 == 0:
            # if int_version > (1 << 15): int_version = (1 << 15) - int_version
            buffer.append(int_version)
    print(buffer)
    print(count)
예제 #6
0
def iter_wav_data(wav: wave.Wave_read, chunk_size: int, min_padding=0):
    wav.rewind()
    nchunks = wav.getnframes() // chunk_size
    for n in range(0, nchunks):
        d = wav.readframes(chunk_size)
        if len(d) < chunk_size:
            d += b'\0\0' * (chunk_size - len(d))
        a =  array.array('h')
        a.frombytes(d)
        yield a
    if min_padding:
        a =  array.array('h')
        a.frombytes(b'\0\0'*min_padding)
        yield a
예제 #7
0
def trim(sound_file: wave.Wave_read, ratio, new_file_path):
    """
    Creates a new trimmed file out of the given one
    :param sound_file: Source file
    :param ratio: The ratio by which the function trims
    :param new_file_path: Path to the output file
    """
    frame_count = sound_file.getnframes()
    target_frame_count = int(frame_count * ratio)

    new_frames = sound_file.readframes(target_frame_count)
    new_file = wave.open(new_file_path, 'w')
    new_file.setparams(sound_file.getparams())
    new_file.writeframes(new_frames)
    new_file.close()
def readAudioFile(fileWav):
    # http://stackoverflow.com/questions/2060628/how-to-read-wav-file-in-pythons
    wav_file = Wave_read(file_wav_dir + fileWav + ".wav")
    nframes =  wav_file.getnframes()    
    sample_rate, wav_data = read(file_wav_dir + fileWav + ".wav")
    mfcc_feat, mspec, spec = mfcc(wav_data,fs = sample_rate)
    print mfcc_feat.shape
    #fbank_feat = logfbank(wav_data, sample_rate)
    #print fbank_feat[1:3,:]    
    
    plt.imshow(mfcc_feat.T,aspect='auto')
    plt.colorbar()
    plt.show()
    
    mfcc_feat =  np.transpose(mfcc_feat)
    print mfcc_feat[0,:].shape
    v1 = deltas_calc(mfcc_feat[0,:])
    print v1
예제 #9
0
def transform_nparray(orignal_wave: wave.Wave_read) -> Tuple[np.ndarray, int]:
    """transform wave into ndarray

    Parameters
    ----------
    orignal_wave : file
        wave_read object

    Returns
    -------
    narray : ndarray
        1-d array
    narray_frame : int
        frame_length
    """

    narray_frame = orignal_wave.getnframes()
    narray = orignal_wave.readframes(narray_frame)
    narray = np.frombuffer(narray, dtype="int16")

    return narray, narray_frame
 def _readAudioFile(self,fileWav):
     # http://stackoverflow.com/questions/2060628/how-to-read-wav-file-in-pythons
     wav_file = Wave_read(fileWav)
     self.nframes =  wav_file.getnframes()    
     sample_rate, wav_data = read(fileWav)
     self.duration = self.nframes / float(sample_rate)
     
     winlen = round(self.duration / self.position.shape[1], 6)       # winlen = length of articulatory frames
     mfcc_feat = mfcc(wav_data,sample_rate, 2* winlen, winlen)                  # need to define window length = ??, window step = ??
     #fbank_feat = logfbank(wav_data, sample_rate)
     #print fbank_feat[1:3,:]    
     #plt.plot(mfcc_feat)
     
     mfcc_feat =  np.transpose(mfcc_feat)
     self.mfcc_feature = mfcc_feat[1:13]
     
     self.factor_mfcc = abs(self.mfcc_feature).max()
     self.mfcc_feature = self.mfcc_feature / self.factor_mfcc            # normalize in [-1,1]
     
     veloc, accel = self._get_velocity_acceleration(self.mfcc_feature)
     self.velocity_mfcc = veloc
     self.acceleration_mfcc = accel
예제 #11
0
def time_labels(wave_file: wave.Wave_read, points=None):
    if points is None:
        points = wave_file.getnframes()
    ts = np.linspace(0, wave_file.getnframes() / wave_file.getframerate(), num=points)
    return ts
예제 #12
0
def main(args):

    #information of voice file (include: file name + start sapmple of speech+ end sapmle of speech + end sample of file) that write in the dataset.txt
    info = []

    #direction input address
    dir_files = glob.glob("*.wav")

    #sort input(not necessary)
    dir_files.sort()

    #an array that keep end of sample of file
    end_sample_file = []

    #start sample of speech
    start_sample_speech = []

    #end sample of speech
    end_sample_speech = []

    #name of orfinal file that cut postfix(not ncessary)
    fileName = []

    #start time(ms) of speech in voice files
    st = []

   #end time(ms) of speech in voice files
    et = []

    #sample rate of all voice file
    sample_rates = []
    #counter in the loop
    count = 0
    #loop in directory
    for n in dir_files:
        #open voice file 
        vc=wave.open(n)

        #append end sample of file in the array
        end_sample_file.append(Wave_read.getnframes(vc))

        #append sample rate of voice file in the aray
        sample_rates.append(Wave_read.getframerate(vc))

        #read_wave is a function that get voice file directory and return audio(in spation format)
        audio, sample_rate = read_wave(n)

        #this is a function of webrtcvad that get a parameter (integer between 0,3) that defind Accurancy
        vad = webrtcvad.Vad(3)
        #generate fram (first parameter is size of window )
        frames = frame_generator(10, audio, sample_rate)
        frames = list(frames)

        #this is main function that recognize speech in the voice file
        segments = vad_collector(sample_rate, 30, 300, vad, frames)
        
        #this for create a voice file that cut unvoiced part of orginal voice file and saved in a new file
        for i, segment in enumerate(segments):
            path = 'edited_'+n
            write_wave(path, segment, sample_rate)

        #split name of filefrom postfix of orginal file (not necessary)
        temp_str=n.split('.')
        fileName.append(temp_str[0])

        #start time(ms) of speech in the voice file
        st.append(stm[-1])
        print('start time (ms) of speech ',n,' is',st[-1])
        #start time(ms) of speech in the voice file
        et.append(etm[-1])
        print('end time (ms) of speech ',n,' is',et[-1])

        #note!
            #stm and etm that use in the vad_collector function are start time and end time of 
                #voice file but because of noise in file maybe those variable get noise time  
                #instead of speech time but in the last position in the array always has a speech
                #time . more information in the vad_collector function
        count = count+1

    #convert all start time of speech in time to sample and saved in satart_samle
    for i in range(0,len(st)):
        start_sample_speech.append(st[i]*sample_rates[i])

    #convert all end time of speech in time to sample and saved in end_samle
    for i in range(0,len(et)):
        end_sample_speech.append(et[i]*sample_rates[i])

    #fill informatio of voice file
    for i in range(0,len(fileName)):
        info.append(fileName[i]+' '+str(int(start_sample_speech[i]))+' '+str(int(end_sample_speech[i]))+' '+str(end_sample_file[i]))

    #write info in the file
    f = open('dataset.txt','w')
    for n in info:
        f.write(n+'\n')
    f.close()