示例#1
0
 def replayAudio(self):
     self.player = player_for(self.audio_source)
     while True:
         if self.trigger_echo:
             self.player.play(self.bdata)
             self.trigger_echo = False
         time.sleep(2)
    def __init__(self):
        self.asource = ADSFactory.ads(record=True, max_time=4)
        self.validator = AudioEnergyValidator(sample_width=2,
                                              energy_threshold=50)
        self.tokenizer = StreamTokenizer(validator=self.validator,
                                         min_length=20,
                                         max_length=1000,
                                         max_continuous_silence=30)
        self.player = player_for(self.asource)

        self.model = self.load_cnn('../model/final_cnn_model.json',
                                   '../model/weights_final_cnn.h5')
        self.model.compile(loss='categorical_crossentropy',
                           optimizer='sgd',
                           metrics=['accuracy'])
示例#3
0
    def configure(self, rf):
        result = BaseModule.configure(self, rf)

        if rf.check("sampleRate"):
            sr = rf.find("sampleRate").asInt()
        else:
            sr = 16000

        asource = ADSFactory.ads(
            sampling_rate=sr)  # create a default ADSFactory 16000Hz 2bytes
        # If non default values are needed
        # e.g. if the audio to be played back
        # was captured at a different rate
        # add parameters to the method creating the ads object

        self.player = player_for(
            asource)  # create a player for the audio source

        return result
                                     energy_threshold=65)

    # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
    # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
    # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
    # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=20,
                                max_length=400,
                                max_continuous_silence=30)

    asource.open()
    tokens = tokenizer.tokenize(asource)

    # Play detected regions back
    player = player_for(asource)

    # Rewind and read the whole signal
    asource.rewind()
    original_signal = []

    while True:
        w = asource.read()
        if w is None:
            break
        original_signal.append(w)

    original_signal = b''.join(original_signal)
    player.play(original_signal)

    print("\n ** playing detected regions...\n")
   # We set the `record` argument to True so that we can rewind the source
   asource = ADSFactory.ads(filename=dataset.one_to_six_arabic_16000_mono_bc_noise, record=True)

   validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=65)

   # Defalut analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
   # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
   # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
   # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms 
   tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=400, max_continuous_silence=30)

   asource.open()
   tokens = tokenizer.tokenize(asource)

   # Play detected regions back
   player = player_for(asource)

   # Rewind and read the whole signal
   asource.rewind()
   original_signal = []

   while True:
      w = asource.read()
      if w is None:
         break
      original_signal.append(w)
     

   original_signal = b''.join(original_signal)
   player.play(original_signal)
示例#6
0
def find_voice_segments(audio_file, music_time_list):
    segments = []
    formats = {1: numpy.int8, 2: numpy.int16, 4: numpy.int32}
    #[Fs_cr, x_cr] = aIO.readAudioFile(input_audio_audio_file)
    #[Fs_ce, x_ce] = aIO.readAudioFile(callee_audio_file)
    #segments = aS.silenceRemoval(x_cr, Fs_cr, 0.010, 0.010, smoothWindow=3,Weight=0.3,plot=False)
    #print(segments)
    #callee_segments = aS.silenceRemoval(x_ce, Fs_ce, 0.010, 0.010, smoothWindow=5,Weight=0.3,plot=False)
    #print(callee_segments)

    test_source = ADSFactory.ads(filename=audio_file, record=False)
    test_source.open()
    i = 0
    max_value = 0.0
    a = numpy.empty([], dtype=numpy.float64)
    b = numpy.empty([], dtype=numpy.float64)
    while True:
        frame = test_source.read()

        if frame is None:
            break

        signal = numpy.array(numpy.frombuffer(
            frame, dtype=formats[test_source.get_sample_width()]),
                             dtype=numpy.float64)
        energy = float(numpy.dot(signal, signal)) / len(signal)
        max_value = max(max_value, energy)
        i += 1
        b = numpy.append(b, [energy])

    #diff = max_value - numpy.mean(b)
    #print(10. * numpy.log10(0.3*diff))
    log_max = 10. * numpy.log10(max_value)
    log_mean = 10. * numpy.log10(numpy.mean(b))
    tmp = log_max - log_mean
    threshold = log_mean + 0.4 * tmp
    #print(threshold)

    test_source.close()
    asource = ADSFactory.ads(filename=audio_file, record=False)
    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                     energy_threshold=threshold)
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=300,
                                max_length=99999999,
                                max_continuous_silence=300)
    player = player_for(asource)

    asource.open()
    tokens = tokenizer.tokenize(asource)
    for i, t in enumerate(tokens):
        segment_begin = t[1] * 10
        segment_end = t[2] * 10

        if len(music_time_list) > 0:
            for item in music_time_list:
                # if segment end includes music begin
                if segment_end > item[0]:
                    #include segment before music
                    segments.append([segment_begin, item[0]])
                    #save stamps for incluing segment after music
                    segment_begin = item[1]
                    # remove music segment from list
                    # to not use it in further
                    music_time_list.remove(item)

        segments.append([segment_begin, segment_end])

    asource.close()
    return segments
示例#7
0
    def runAuditok(self):
        '''
        This method captures sound from the audio source specified in self.audioPath
        if self.audioPath is None, the built in microphone is used.
        
        
        '''

        #a = raw_input("waiting for start")
        if self.audioPath == None:
            self.asource = ADSFactory.ads(sampling_rate=self.sr)
        else:
            self.asource = ADSFactory.ads(filename=self.audioPath,
                                          sampling_rate=self.sr)

        self.validator = AudioEnergyValidator(
            sample_width=self.asource.get_sample_width(),
            energy_threshold=self.energy)

        self.tokenizer = StreamTokenizer(
            validator=self.validator,
            min_length=self.min_len,
            max_length=self.max_len,
            max_continuous_silence=self.max_con_si)

        self.player = player_for(self.asource)

        self.prev_data = np.zeros([1])

        def audio_callback(data, start, end):

            if not np.array_equal(data, self.prev_data):
                self.sendTrigger()  # send notice that audio has been detected

                print("Acoustic activity at: {0}--{1}".format(start, end))

                stamp = (start, end, self.chunk_count)

                if self.record:
                    self.saveAudio(data)

                copied = []
                for x in data:

                    np_data = np.frombuffer(x, dtype=np.uint8)
                    #print np_data
                    copied.append(np_data)

                data_rs = self.reshapeAudio(np.asarray(copied))

                self.sendAudio(data_rs, stamp)

                self.prev_data = data
                if self.PLAYBACK:
                    print "playing audio"
                    self.playback(data_rs)

                self.chunk_count += 1

        self.asource.open()
        self.sendTrigger(
        )  # send notice that the audio has started to be processed
        self.tokenizer.tokenize(self.asource, callback=audio_callback)
        sys.exit(0)