def split(filename='g1238-20181214-081712-1544750232.37681.wav'):
    sr, samples = wavfile.read(filename=filename, mmap=True)
    #print(len(samples))
    plt.plot(samples)
    asource = ADSFactory.ads(filename=filename, record=False)
    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                     energy_threshold=50)
    # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
    # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
    # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
    # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 10 == 300 ms
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=100,
                                max_length=500,
                                max_continuous_silence=50)
    asource.open()
    tokens = tokenizer.tokenize(asource)
    stack = []
    sum = []
    for i, t in enumerate(tokens):
        #print("Token [{0}] starts at {1} and ends at {2}".format(i+1, t[1], t[2]))
        stack.append([t[1] * 80, t[2] * 80])
        sum.append((t[2] * 80 - t[1] * 80) / 8000)
        wavfile.write('token_' + str(i) + '.wav', sr,
                      samples[t[1] * 80:t[2] * 80])  #write to file
    asource.close()
    print(sum)
    return stack
Пример #2
0
    def transcribe_audio(self, stereo_path, channels_to_process):

        if not os.path.isfile(stereo_path):
            raise Exception("Audio file does not exist.")

        data = self.split_to_mono(stereo_path)

        a_leg = data['a_leg']
        b_leg = data['b_leg']

        data['a_leg'] = None
        data['b_leg'] = None

        validator = AudioEnergyValidator(sample_width=data['frame_width'], energy_threshold=45)
        trimmer = StreamTokenizer(validator,
                                  min_length=self.min_segment_length,
                                  max_length=self.max_segment_length,
                                  max_continuous_silence=self.max_continuous_silence,
                                  mode=StreamTokenizer.DROP_TAILING_SILENCE)

        segments = []
        if channels_to_process in ['A', 'AB']:
            a_source = ADSFactory.ads(audio_source=a_leg, record=True, block_size=data['frame_rate'] / self.divisor)
            a_source.open()
            trimmer.tokenize(a_source, callback=lambda data, start, end: segments.append(("A", data, start, end)))

        if channels_to_process in ['B', 'AB']:
            b_source = ADSFactory.ads(audio_source=b_leg, record=True, block_size=data['frame_rate'] / self.divisor)
            b_source.open()
            trimmer.tokenize(b_source, callback=lambda data, start, end: segments.append(("B", data, start, end)))

        segments = sorted(segments, key=lambda x: x[3])
        self.batch(segments, data['duration'], data['frame_rate'], data['frame_width'], data['nchannels'])
Пример #3
0
def make_auditok_detector(sample_rate=100):
    bytes_per_frame = 2
    frames_per_window = FRAME_RATE // sample_rate
    validator = AudioEnergyValidator(sample_width=bytes_per_frame,
                                     energy_threshold=50)
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=0.2 * sample_rate,
                                max_length=int(5 * sample_rate),
                                max_continuous_silence=0.25 * sample_rate)

    def _detect(asegment):
        asource = BufferAudioSource(data_buffer=asegment,
                                    sampling_rate=FRAME_RATE,
                                    sample_width=bytes_per_frame,
                                    channels=1)
        ads = ADSFactory.ads(audio_source=asource, block_dur=1. / sample_rate)
        ads.open()
        tokens = tokenizer.tokenize(ads)
        length = (len(asegment) // bytes_per_frame + frames_per_window -
                  1) // frames_per_window
        media_bstring = np.zeros(length + 1, dtype=int)
        for token in tokens:
            media_bstring[token[1]] += 1
            media_bstring[token[2] + 1] -= 1
        return np.cumsum(media_bstring)[:-1] > 0

    return _detect
Пример #4
0
def getSplitAudioDurationListBetweenSilence(fileName,eachAudioLen,silencePeriod,energyThreshold=55):
    try:
        # We set the `record` argument to True so that we can rewind the source
        asource = ADSFactory.ads(filename=fileName, record=False)

        validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=energyThreshold)

        # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
        # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
        # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
        # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms 
        tokenizer = StreamTokenizer(validator=validator, min_length=400, max_length=eachAudioLen*100, max_continuous_silence=silencePeriod*100)

        asource.open()
        tokens = tokenizer.tokenize(asource)

        # Play detected regions back
        #player = player_for(asource)

        # Rewind and read the whole signal
        #asource.rewind()
        #original_signal = []

        #while True:
        #    w = asource.read()
        #    if w is None:
        #        break
        #    original_signal.append(w)


        #original_signal = b''.join(original_signal)
        #player.play(original_signal)

        #print("\n ** playing detected regions...\n")
        #for i,t in enumerate(tokens):
        #    print("Token [{0}] starts at {1} and ends at {2}".format(i+1, t[1], t[2]))
            #data = b''.join(t[0])
            #player.play(data)

        #assert len(tokens) == 8

        asource.close()
        #player.stop()
    except KeyboardInterrupt:

        #player.stop()
        asource.close()
        #sys.exit(0)

    except Exception as e:

        sys.stderr.write(str(e) + "\n")
        #sys.exit(1)
    return tokens
Пример #5
0
def _make_auditok_detector(
    sample_rate: int, frame_rate: int, non_speech_label: float
) -> Callable[[bytes], np.ndarray]:
    try:
        from auditok import (
            BufferAudioSource,
            ADSFactory,
            AudioEnergyValidator,
            StreamTokenizer,
        )
    except ImportError as e:
        logger.error(
            """Error: auditok not installed!
        Consider installing it with `pip install auditok`. Note that auditok
        is GPLv3 licensed, which means that successfully importing it at
        runtime creates a derivative work that is GPLv3 licensed. For personal
        use this is fine, but note that any commercial use that relies on
        auditok must be open source as per the GPLv3!*
        *Not legal advice. Consult with a lawyer.
        """
        )
        raise e
    bytes_per_frame = 2
    frames_per_window = frame_rate // sample_rate
    validator = AudioEnergyValidator(sample_width=bytes_per_frame, energy_threshold=50)
    tokenizer = StreamTokenizer(
        validator=validator,
        min_length=0.2 * sample_rate,
        max_length=int(5 * sample_rate),
        max_continuous_silence=0.25 * sample_rate,
    )

    def _detect(asegment: bytes) -> np.ndarray:
        asource = BufferAudioSource(
            data_buffer=asegment,
            sampling_rate=frame_rate,
            sample_width=bytes_per_frame,
            channels=1,
        )
        ads = ADSFactory.ads(audio_source=asource, block_dur=1.0 / sample_rate)
        ads.open()
        tokens = tokenizer.tokenize(ads)
        length = (
            len(asegment) // bytes_per_frame + frames_per_window - 1
        ) // frames_per_window
        media_bstring = np.zeros(length + 1)
        for token in tokens:
            media_bstring[token[1]] = 1.0
            media_bstring[token[2] + 1] = non_speech_label - 1.0
        return np.clip(np.cumsum(media_bstring)[:-1], 0.0, 1.0)

    return _detect
Пример #6
0
def read_split_dir(file):
    f = sf.SoundFile(file)
    #duration of file in seconds
    duration = len(f) / f.samplerate

    if duration <= 4:
        print(file, 'untouched')
    else:
        #Get original filename
        name = os.path.splitext(file)[0]

        tempsound = AudioSegment.from_wav(file)
        tempsound = tempsound.set_channels(1)
        tempsound.export('0wavtmp_' + file, format="wav")
        tmpfile = '0wavtmp_' + file

        # We set the `record` argument to True so that we can rewind the source
        asource = ADSFactory.ads(filename=tmpfile, record=True)

        validator = AudioEnergyValidator(
            sample_width=asource.get_sample_width(), energy_threshold=50)

        # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
        # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
        # max_length=4000 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
        # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms
        tokenizer = StreamTokenizer(validator=validator,
                                    min_length=500,
                                    max_length=4000,
                                    max_continuous_silence=100)

        asource.open()
        tokens = tokenizer.tokenize(asource)

        for index, t in enumerate(tokens):
            #print("Token starts at {0} and ends at {1}".format(t[1], t[2]))
            newAudio = AudioSegment.from_wav(file)
            newAudio = newAudio[t[1]:t[2]]

            chunk_name = "{}_clip{}.wav".format(name, index)
            print("Generating", chunk_name)
            newAudio.export(
                chunk_name,
                format="wav")  #Exports to a wav file in the current path.

        #Remove the temporary file we made earlier
        os.remove(tmpfile)
        #Remove the original file to avoid confusion
        os.remove(file)
    def __init__(self):
        self.asource = ADSFactory.ads(record=True, max_time=4)
        self.validator = AudioEnergyValidator(sample_width=2,
                                              energy_threshold=50)
        self.tokenizer = StreamTokenizer(validator=self.validator,
                                         min_length=20,
                                         max_length=1000,
                                         max_continuous_silence=30)
        self.player = player_for(self.asource)

        self.model = self.load_cnn('../model/final_cnn_model.json',
                                   '../model/weights_final_cnn.h5')
        self.model.compile(loss='categorical_crossentropy',
                           optimizer='sgd',
                           metrics=['accuracy'])
Пример #8
0
    def calibrate(self):
        '''
        This method calibrates the MinMaxScaler, self.scaler, by capturing 10 seconds
        of audio and applying MinMaxScaler fit method.
        See sklearn.preprocessing.MinMaxScaler for details.
        
        This is redundant, scaling is not necessary.
        
        '''
        a = raw_input(
            "Calibrate normalisation, press return then make noises from your mouth hole."
        )
        if self.audioPath == None:
            asource = ADSFactory.ads(sampling_rate=self.sr, max_time=10)
        else:
            asource = ADSFactory.ads(filename=self.audioPath,
                                     sampling_rate=self.sr,
                                     max_time=10)

        validator = AudioEnergyValidator(
            sample_width=asource.get_sample_width(),
            energy_threshold=self.energy)

        tokenizer = StreamTokenizer(validator=validator,
                                    min_length=self.min_len,
                                    max_length=self.max_len,
                                    max_continuous_silence=self.max_con_si)

        def calib_callback(data, start, end):
            audio = np.fromstring(data[0], dtype=np.int8)
            self.scaler.fit_transform(np.swapaxes(np.asarray([audio]), 0, 1))
            print "Audio sample found {0}--{1}".format(start, end)

        asource.open()

        tokenizer.tokenize(asource, callback=calib_callback)
        print "Scaler paramaters found: min: {0} max: {1}".format(
            self.scaler.data_min_, self.scaler.data_max_)

        print "calibration done"
        self.mini = self.scaler.data_min_
        self.maxi = self.scaler.data_max_
    def __init__(self, _useGui):
        # parametros de áudio
        max_length = 1000000
        max_interval = 12000
        max_continuous_silence = 500
        min_length = 150

        self.sample_rate = 48000
        self.asource = ADSFactory.ads(record=True,
                                      max_time=max_length,
                                      sampling_rate=self.sample_rate)

        self.sample_width = self.asource.get_sample_width()
        self.channels = self.asource.get_channels()
        # START VALIDATOR
        self.validator = AudioEnergyValidator(
            sample_width=self.sample_width, energy_threshold=energy_threshold)
        self.tokenizer = StreamTokenizer(
            validator=self.validator,
            min_length=min_length,
            max_length=max_length,
            max_continuous_silence=max_continuous_silence)

        self.audio_folder = 'recordings/' + '{:%Y-%m-%d_%H-%M-%S}'.format(
            datetime.datetime.now()) + '/'
        if not os.path.exists(os.path.dirname(self.audio_folder)):
            try:
                os.makedirs(os.path.dirname(self.audio_folder))
            except OSError as exc:  # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raiseRec
        os.chmod('recordings', 0o777)
        os.chmod(self.audio_folder, 0o777)
        self.MODE = 'ECHO'

        self.useGui = _useGui

        if self.useGui:
            root = Tk()
            self.display = GUI(root, True)
            self.display.display_image()
Пример #10
0
    def configure(self, rf):
        # Setting up rpc port
        self.portsList["rpc"] = yarp.Port()
        self.portsList["rpc"].open("/sentence_tokenizer/rpc:i")
        self.attach(self.portsList["rpc"])

        self.portsList["audio_out"] = yarp.BufferedPortBottle()
        self.portsList["audio_out"].open("/sentence_tokenizer/audio:o")

        # Setting up hotword detection
        self.hotword_detector = snowboydecoder.HotwordDetector(self.hotword_model, sensitivity=self.hotword_sensitivity)

        # Setting up audio tokenizer to split sentences
        self.audio_source = ADSFactory.ads(record=True, max_time=self.tok_record_duration, block_dur=self.tok_window)
        self.tok_validator = AudioEnergyValidator(sample_width=self.audio_source.get_sample_width(),
                                                  energy_threshold=self.tok_energy_threshold)
        self.tokenizer_mode = StreamTokenizer.DROP_TRAILING_SILENCE
        self.tokenizer = StreamTokenizer(validator=self.tok_validator,
                                         min_length=self.tok_min_len,
                                         max_length=self.tok_max_len,
                                         max_continuous_silence=self.tok_max_silence_duration,
                                         mode=self.tokenizer_mode)

        if self.echo_enabled:
            self.echo_thread = threading.Thread(target=self.replayAudio)
            self.echo_thread.start()

        if self.hotword_enabled:
            print("Waiting for hotword to start interaction")
            # self.hotword_detector.start(detected_callback=self.detected_callback,
            #                             interrupt_check=self.interrupt_callback,
            #                             sleep_time=self.hotword_loop_time)
            print("Hotword detected. Starting tokenizer thread")
        else:
            print "Starting tokenizer thread"

        self.asr = sr.Recognizer()

        with open('google_credentials.json', 'r') as credentials:
            self.google_credentials = credentials.read()
        return True
'''
# record = True so that we'll be able to rewind the source.
# max_time = 10: read 10 seconds from the microphone
asource = ADSFactory.ads(record=True)

validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=50)
tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=250, max_continuous_silence=30)

player = player_for(asource)
asource.open()

tokenizer.tokenize(asource, callback=echo)
'''
asource = ADSFactory.ads(sampling_rate=16000,
                         sample_width=2,
                         channels=1,
                         frames_per_buffer=128,
                         record=False,
                         block_dur=0.01)

validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                 energy_threshold=50)
tokenizer = StreamTokenizer(validator=validator,
                            min_length=100,
                            max_continuous_silence=500)

asource.open()

tokenizer.tokenize(asource, callback=echo)
Пример #12
0
    # set up audio source
    asource = ADSFactory.ads(record=True,
                             max_time=min_length,
                             sampling_rate=sample_rate)

    #check os system and set sample rate 48000 for Linux (Raspberry Pi)
    _os = platform.system()
    if (_os == 'Darwin') or (_os == 'Windows'):  # macOs
        sample_rate = asource.get_sampling_rate()

    # get sample width and channels from ads factory
    sample_width = asource.get_sample_width()
    channels = asource.get_channels()

    # START VALIDATOR
    validator = AudioEnergyValidator(sample_width=sample_width,
                                     energy_threshold=energy_threshold)
    tokenizer = StreamTokenizer(
        validator=validator,
        min_length=min_length,
        max_length=max_length,
        max_continuous_silence=max_continuous_silence)  #

    # LOAD PYAUDIO
    p = pyaudio.PyAudio()

    # start classe memoria
    _memoria = memoria.Memoria()

    # gui vars
    if GUI:
        root = Tk()
Пример #13
0
def find_voice_segments(audio_file, music_time_list):
    segments = []
    formats = {1: numpy.int8, 2: numpy.int16, 4: numpy.int32}
    #[Fs_cr, x_cr] = aIO.readAudioFile(input_audio_audio_file)
    #[Fs_ce, x_ce] = aIO.readAudioFile(callee_audio_file)
    #segments = aS.silenceRemoval(x_cr, Fs_cr, 0.010, 0.010, smoothWindow=3,Weight=0.3,plot=False)
    #print(segments)
    #callee_segments = aS.silenceRemoval(x_ce, Fs_ce, 0.010, 0.010, smoothWindow=5,Weight=0.3,plot=False)
    #print(callee_segments)

    test_source = ADSFactory.ads(filename=audio_file, record=False)
    test_source.open()
    i = 0
    max_value = 0.0
    a = numpy.empty([], dtype=numpy.float64)
    b = numpy.empty([], dtype=numpy.float64)
    while True:
        frame = test_source.read()

        if frame is None:
            break

        signal = numpy.array(numpy.frombuffer(
            frame, dtype=formats[test_source.get_sample_width()]),
                             dtype=numpy.float64)
        energy = float(numpy.dot(signal, signal)) / len(signal)
        max_value = max(max_value, energy)
        i += 1
        b = numpy.append(b, [energy])

    #diff = max_value - numpy.mean(b)
    #print(10. * numpy.log10(0.3*diff))
    log_max = 10. * numpy.log10(max_value)
    log_mean = 10. * numpy.log10(numpy.mean(b))
    tmp = log_max - log_mean
    threshold = log_mean + 0.4 * tmp
    #print(threshold)

    test_source.close()
    asource = ADSFactory.ads(filename=audio_file, record=False)
    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                     energy_threshold=threshold)
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=300,
                                max_length=99999999,
                                max_continuous_silence=300)
    player = player_for(asource)

    asource.open()
    tokens = tokenizer.tokenize(asource)
    for i, t in enumerate(tokens):
        segment_begin = t[1] * 10
        segment_end = t[2] * 10

        if len(music_time_list) > 0:
            for item in music_time_list:
                # if segment end includes music begin
                if segment_end > item[0]:
                    #include segment before music
                    segments.append([segment_begin, item[0]])
                    #save stamps for incluing segment after music
                    segment_begin = item[1]
                    # remove music segment from list
                    # to not use it in further
                    music_time_list.remove(item)

        segments.append([segment_begin, segment_end])

    asource.close()
    return segments
Пример #14
0
    def extractEvents(path, patientID):

        yname = os.path.basename(path)
        yname = yname[:len(yname) - 4]

        dest_path = '/home/pi/recordings/' + patientID + '/' + yname + '/'
        if not os.path.exists(dest_path):
            os.makedirs(dest_path)

        fsoriginal, y = wavfile.read(path)  # read audio file
        try:

            r, c = np.shape(y)
            if c > 1:
                y = np.delete(y, 1, axis=1)
                # print("audio file shape:  ", numpy.shape(y))
        except:
            print(' ')

        wavfile.write('/home/pi/coughanalysis_ann/sample.wav',
                      data=y,
                      rate=44100)

        asource = ADSFactory.ads(
            filename='/home/pi/coughanalysis_ann/sample.wav', record=True)

        validator = AudioEnergyValidator(
            sample_width=asource.get_sample_width(), energy_threshold=65)

        # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
        # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
        # max_length=4000 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
        # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms

        # For a sampling rate of 16KHz (16000 samples per second), we have 160 samples for 10 ms.

        tokenizer = StreamTokenizer(validator=validator,
                                    min_length=10,
                                    max_length=1000,
                                    max_continuous_silence=40)

        asource.open()
        tokens = tokenizer.tokenize(asource)

        # Play detected regions back

        # player = player_for(asource)

        # Rewind and read the whole signal
        asource.rewind()
        original_signal = []

        while True:
            w = asource.read()
            if w is None:
                break
            original_signal.append(w)

        original_signal = ''.join(original_signal)

        # print("Playing the original file...")
        # player.play(original_signal)

        # print("playing detected regions...")
        count = 0
        for t in tokens:
            # print("Token starts at {0} and ends at {1}".format(t[1], t[2]))
            data = ''.join(t[0])
            # player.play(data)

            fp = wave.open(dest_path + yname + str(count) + '.wav', "w")
            fp.setnchannels(asource.get_channels())
            fp.setsampwidth(asource.get_sample_width())
            fp.setframerate(asource.get_sampling_rate())
            fp.writeframes(data)
            fp.close()
            count += 1

        return dest_path
Пример #15
0
try:

    r, c = numpy.shape(y)
    if c > 1:
        y = numpy.delete(y, 1, axis=1)
        # print("audio file shape:  ", numpy.shape(y))
except:
    print(' ')

wavfile.write('sample.wav', data=y, rate=44100)

asource = ADSFactory.ads(
    filename="/home/baswarajmamidgi/salcit/coughanalysis_ann/sample.wav",
    record=True)

validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                 energy_threshold=65)

# Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
# min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
# max_length=4000 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
# max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms

#For a sampling rate of 16KHz (16000 samples per second), we have 160 samples for 10 ms.

tokenizer = StreamTokenizer(validator=validator,
                            min_length=10,
                            max_length=1000,
                            max_continuous_silence=40)

asource.open()
tokens = tokenizer.tokenize(asource)
Пример #16
0
    def runAuditok(self):
        '''
        This method captures sound from the audio source specified in self.audioPath
        if self.audioPath is None, the built in microphone is used.
        
        
        '''

        #a = raw_input("waiting for start")
        if self.audioPath == None:
            self.asource = ADSFactory.ads(sampling_rate=self.sr)
        else:
            self.asource = ADSFactory.ads(filename=self.audioPath,
                                          sampling_rate=self.sr)

        self.validator = AudioEnergyValidator(
            sample_width=self.asource.get_sample_width(),
            energy_threshold=self.energy)

        self.tokenizer = StreamTokenizer(
            validator=self.validator,
            min_length=self.min_len,
            max_length=self.max_len,
            max_continuous_silence=self.max_con_si)

        self.player = player_for(self.asource)

        self.prev_data = np.zeros([1])

        def audio_callback(data, start, end):

            if not np.array_equal(data, self.prev_data):
                self.sendTrigger()  # send notice that audio has been detected

                print("Acoustic activity at: {0}--{1}".format(start, end))

                stamp = (start, end, self.chunk_count)

                if self.record:
                    self.saveAudio(data)

                copied = []
                for x in data:

                    np_data = np.frombuffer(x, dtype=np.uint8)
                    #print np_data
                    copied.append(np_data)

                data_rs = self.reshapeAudio(np.asarray(copied))

                self.sendAudio(data_rs, stamp)

                self.prev_data = data
                if self.PLAYBACK:
                    print "playing audio"
                    self.playback(data_rs)

                self.chunk_count += 1

        self.asource.open()
        self.sendTrigger(
        )  # send notice that the audio has started to be processed
        self.tokenizer.tokenize(self.asource, callback=audio_callback)
        sys.exit(0)