def test_init_min_3_init_max_silence_0(self):
     
     tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=20,
                                  max_continuous_silence=4, init_min = 3,
                                  init_max_silence = 0, mode=0)
     
     
     
     data_source = StringDataSource("aAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaaAAAAA")
     #                                                 ^           ^  ^   ^
     #                                                 18          30 33  37
     
     tokens = tokenizer.tokenize(data_source)
             
     self.assertEqual(len(tokens), 2, msg="wrong number of tokens, expected: 2, found: {0} ".format(len(tokens)))
     tok1, tok2 = tokens[0], tokens[1]
     
     data = ''.join(tok1[0])
     start = tok1[1]
     end = tok1[2]
     self.assertEqual(data, "AAAAAAAAAaaaa",
                     msg="wrong data for token 1, expected: 'AAAAAAAAAaaaa', found: '{0}' ".format(data))
     self.assertEqual(start, 18, msg="wrong start frame for token 1, expected: 18, found: {0} ".format(start))
     self.assertEqual(end, 30, msg="wrong end frame for token 1, expected: 30, found: {0} ".format(end))
     
     
     data = ''.join(tok2[0])
     start = tok2[1]
     end = tok2[2]
     self.assertEqual(data, "AAAAA",
                     msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(data))
     self.assertEqual(start, 33, msg="wrong start frame for token 2, expected: 33, found: {0} ".format(start))
     self.assertEqual(end, 37, msg="wrong end frame for token 2, expected: 37, found: {0} ".format(end))
 def test_init_min_0_init_max_silence_0(self):
     
     tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=20,
                                  max_continuous_silence=4, init_min = 0,
                                  init_max_silence = 0, mode=0)
     
     
     data_source = StringDataSource("aAaaaAaAaaAaAaaaaaaaAAAAAAAA")
     #                                ^              ^   ^      ^
     #                                2              16  20     27
     tokens = tokenizer.tokenize(data_source)
             
     self.assertEqual(len(tokens), 2, msg="wrong number of tokens, expected: 2, found: {0} ".format(len(tokens)))
     tok1, tok2 = tokens[0], tokens[1]
     
     # tok1[0]: data
     # tok1[1]: start frame (included)
     # tok1[2]: end frame (included)
     
     data = ''.join(tok1[0])
     start = tok1[1]
     end = tok1[2]
     self.assertEqual(data, "AaaaAaAaaAaAaaaa",
                     msg="wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', found: {0} ".format(data))
     self.assertEqual(start, 1, msg="wrong start frame for token 1, expected: 1, found: {0} ".format(start))
     self.assertEqual(end, 16, msg="wrong end frame for token 1, expected: 16, found: {0} ".format(end))
     
     
     data = ''.join(tok2[0])
     start = tok2[1]
     end = tok2[2]
     self.assertEqual(data, "AAAAAAAA",
                     msg="wrong data for token 1, expected: 'AAAAAAAA', found: {0} ".format(data))
     self.assertEqual(start, 20, msg="wrong start frame for token 2, expected: 20, found: {0} ".format(start))
     self.assertEqual(end, 27, msg="wrong end frame for token 2, expected: 27, found: {0} ".format(end))
def split(filename='g1238-20181214-081712-1544750232.37681.wav'):
    sr, samples = wavfile.read(filename=filename, mmap=True)
    #print(len(samples))
    plt.plot(samples)
    asource = ADSFactory.ads(filename=filename, record=False)
    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                     energy_threshold=50)
    # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
    # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
    # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
    # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 10 == 300 ms
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=100,
                                max_length=500,
                                max_continuous_silence=50)
    asource.open()
    tokens = tokenizer.tokenize(asource)
    stack = []
    sum = []
    for i, t in enumerate(tokens):
        #print("Token [{0}] starts at {1} and ends at {2}".format(i+1, t[1], t[2]))
        stack.append([t[1] * 80, t[2] * 80])
        sum.append((t[2] * 80 - t[1] * 80) / 8000)
        wavfile.write('token_' + str(i) + '.wav', sr,
                      samples[t[1] * 80:t[2] * 80])  #write to file
    asource.close()
    print(sum)
    return stack
示例#4
0
    def test_callback(self):

        tokens = []

        def callback(data, start, end):
            tokens.append((data, start, end))

        tokenizer = StreamTokenizer(
            self.A_validator,
            min_length=5,
            max_length=8,
            max_continuous_silence=3,
            init_min=3,
            init_max_silence=3,
            mode=0,
        )

        data_source = StringDataSource("aaAAAAAAAAAAAAa")
        #                                 ^      ^^   ^
        #                                 2      910  14

        tokenizer.tokenize(data_source, callback=callback)

        self.assertEqual(
            len(tokens),
            2,
            msg="wrong number of tokens, expected: 1, found: {0} ".format(
                len(tokens)),
        )
    def transcribe_audio(self, stereo_path, channels_to_process):

        if not os.path.isfile(stereo_path):
            raise Exception("Audio file does not exist.")

        data = self.split_to_mono(stereo_path)

        a_leg = data['a_leg']
        b_leg = data['b_leg']

        data['a_leg'] = None
        data['b_leg'] = None

        validator = AudioEnergyValidator(sample_width=data['frame_width'], energy_threshold=45)
        trimmer = StreamTokenizer(validator,
                                  min_length=self.min_segment_length,
                                  max_length=self.max_segment_length,
                                  max_continuous_silence=self.max_continuous_silence,
                                  mode=StreamTokenizer.DROP_TAILING_SILENCE)

        segments = []
        if channels_to_process in ['A', 'AB']:
            a_source = ADSFactory.ads(audio_source=a_leg, record=True, block_size=data['frame_rate'] / self.divisor)
            a_source.open()
            trimmer.tokenize(a_source, callback=lambda data, start, end: segments.append(("A", data, start, end)))

        if channels_to_process in ['B', 'AB']:
            b_source = ADSFactory.ads(audio_source=b_leg, record=True, block_size=data['frame_rate'] / self.divisor)
            b_source.open()
            trimmer.tokenize(b_source, callback=lambda data, start, end: segments.append(("B", data, start, end)))

        segments = sorted(segments, key=lambda x: x[3])
        self.batch(segments, data['duration'], data['frame_rate'], data['frame_width'], data['nchannels'])
示例#6
0
def getSplitAudioDurationListBetweenSilence(fileName,eachAudioLen,silencePeriod,energyThreshold=55):
    try:
        # We set the `record` argument to True so that we can rewind the source
        asource = ADSFactory.ads(filename=fileName, record=False)

        validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=energyThreshold)

        # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
        # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
        # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
        # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms 
        tokenizer = StreamTokenizer(validator=validator, min_length=400, max_length=eachAudioLen*100, max_continuous_silence=silencePeriod*100)

        asource.open()
        tokens = tokenizer.tokenize(asource)

        # Play detected regions back
        #player = player_for(asource)

        # Rewind and read the whole signal
        #asource.rewind()
        #original_signal = []

        #while True:
        #    w = asource.read()
        #    if w is None:
        #        break
        #    original_signal.append(w)


        #original_signal = b''.join(original_signal)
        #player.play(original_signal)

        #print("\n ** playing detected regions...\n")
        #for i,t in enumerate(tokens):
        #    print("Token [{0}] starts at {1} and ends at {2}".format(i+1, t[1], t[2]))
            #data = b''.join(t[0])
            #player.play(data)

        #assert len(tokens) == 8

        asource.close()
        #player.stop()
    except KeyboardInterrupt:

        #player.stop()
        asource.close()
        #sys.exit(0)

    except Exception as e:

        sys.stderr.write(str(e) + "\n")
        #sys.exit(1)
    return tokens
 def test_min_length_1_init_max_length_1(self):
 
     tokenizer = StreamTokenizer(self.A_validator, min_length = 1, max_length=1,
                                  max_continuous_silence=0, init_min = 0,
                                  init_max_silence = 0, mode=0)
     
     
     data_source = StringDataSource("AAaaaAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaAAAAA")
     
     tokens = tokenizer.tokenize(data_source)
                     
     self.assertEqual(len(tokens), 21, msg="wrong number of tokens, expected: 21, found: {0} ".format(len(tokens)))
 def test_min_length_4_init_max_length_5(self):
 
     tokenizer = StreamTokenizer(self.A_validator, min_length = 4, max_length=5,
                                  max_continuous_silence=4, init_min = 3,
                                  init_max_silence = 3, mode=0)
     
     
     data_source = StringDataSource("aAaaaAaAaaAaAaaaaaAAAAAAAAaaaaaaAAAAAaaaaaAAaaAaa")
     #                                                 ^   ^^   ^    ^   ^     ^   ^
     #                                                 18 2223  27   32  36    42  46
     
     tokens = tokenizer.tokenize(data_source)
            
     self.assertEqual(len(tokens), 4, msg="wrong number of tokens, expected: 4, found: {0} ".format(len(tokens)))
     tok1, tok2, tok3, tok4 = tokens[0], tokens[1], tokens[2], tokens[3]
     
     
     data = ''.join(tok1[0])
     start = tok1[1]
     end = tok1[2]
     self.assertEqual(data, "AAAAA",
                     msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(data))
     self.assertEqual(start, 18, msg="wrong start frame for token 1, expected: 18, found: {0} ".format(start))
     self.assertEqual(end, 22, msg="wrong end frame for token 1, expected: 22, found: {0} ".format(end))
     
     
     data = ''.join(tok2[0])
     start = tok2[1]
     end = tok2[2]
     self.assertEqual(data, "AAAaa",
                     msg="wrong data for token 1, expected: 'AAAaa', found: '{0}' ".format(data))
     self.assertEqual(start, 23, msg="wrong start frame for token 1, expected: 23, found: {0} ".format(start))
     self.assertEqual(end, 27, msg="wrong end frame for token 1, expected: 27, found: {0} ".format(end))
     
     
     data = ''.join(tok3[0])
     start = tok3[1]
     end = tok3[2]
     self.assertEqual(data, "AAAAA",
                     msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(data))
     self.assertEqual(start, 32, msg="wrong start frame for token 1, expected: 1, found: {0} ".format(start))
     self.assertEqual(end, 36, msg="wrong end frame for token 1, expected: 7, found: {0} ".format(end))
     
     
     data = ''.join(tok4[0])
     start = tok4[1]
     end = tok4[2]
     self.assertEqual(data, "AAaaA",
                     msg="wrong data for token 2, expected: 'AAaaA', found: '{0}' ".format(data))
     self.assertEqual(start, 42, msg="wrong start frame for token 2, expected: 17, found: {0} ".format(start))
     self.assertEqual(end, 46, msg="wrong end frame for token 2, expected: 22, found: {0} ".format(end))
示例#9
0
def read_split_dir(file):
    f = sf.SoundFile(file)
    #duration of file in seconds
    duration = len(f) / f.samplerate

    if duration <= 4:
        print(file, 'untouched')
    else:
        #Get original filename
        name = os.path.splitext(file)[0]

        tempsound = AudioSegment.from_wav(file)
        tempsound = tempsound.set_channels(1)
        tempsound.export('0wavtmp_' + file, format="wav")
        tmpfile = '0wavtmp_' + file

        # We set the `record` argument to True so that we can rewind the source
        asource = ADSFactory.ads(filename=tmpfile, record=True)

        validator = AudioEnergyValidator(
            sample_width=asource.get_sample_width(), energy_threshold=50)

        # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
        # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
        # max_length=4000 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
        # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms
        tokenizer = StreamTokenizer(validator=validator,
                                    min_length=500,
                                    max_length=4000,
                                    max_continuous_silence=100)

        asource.open()
        tokens = tokenizer.tokenize(asource)

        for index, t in enumerate(tokens):
            #print("Token starts at {0} and ends at {1}".format(t[1], t[2]))
            newAudio = AudioSegment.from_wav(file)
            newAudio = newAudio[t[1]:t[2]]

            chunk_name = "{}_clip{}.wav".format(name, index)
            print("Generating", chunk_name)
            newAudio.export(
                chunk_name,
                format="wav")  #Exports to a wav file in the current path.

        #Remove the temporary file we made earlier
        os.remove(tmpfile)
        #Remove the original file to avoid confusion
        os.remove(file)
    def __init__(self):
        self.asource = ADSFactory.ads(record=True, max_time=4)
        self.validator = AudioEnergyValidator(sample_width=2,
                                              energy_threshold=50)
        self.tokenizer = StreamTokenizer(validator=self.validator,
                                         min_length=20,
                                         max_length=1000,
                                         max_continuous_silence=30)
        self.player = player_for(self.asource)

        self.model = self.load_cnn('../model/final_cnn_model.json',
                                   '../model/weights_final_cnn.h5')
        self.model.compile(loss='categorical_crossentropy',
                           optimizer='sgd',
                           metrics=['accuracy'])
示例#11
0
    def test_STRICT_MIN_LENGTH_and_DROP_TAILING_SILENCE(self):

        tokenizer = StreamTokenizer(
            self.A_validator,
            min_length=5,
            max_length=8,
            max_continuous_silence=3,
            init_min=3,
            init_max_silence=3,
            mode=StreamTokenizer.STRICT_MIN_LENGTH
            | StreamTokenizer.DROP_TRAILING_SILENCE,
        )

        data_source = StringDataSource("aaAAAAAAAAAAAAaa")
        #                                 ^      ^
        #                                 2      8

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            1,
            msg="wrong number of tokens, expected: 1, found: {0} ".format(
                len(tokens)),
        )
        tok1 = tokens[0]

        data = "".join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AAAAAAAA",
            msg=("wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            2,
            msg=("wrong start frame for token 1, expected: 2, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            9,
            msg=("wrong end frame for token 1, expected: 9, found: {0} "
                 ).format(end),
        )
示例#12
0
def make_auditok_detector(sample_rate=100):
    bytes_per_frame = 2
    frames_per_window = FRAME_RATE // sample_rate
    validator = AudioEnergyValidator(sample_width=bytes_per_frame,
                                     energy_threshold=50)
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=0.2 * sample_rate,
                                max_length=int(5 * sample_rate),
                                max_continuous_silence=0.25 * sample_rate)

    def _detect(asegment):
        asource = BufferAudioSource(data_buffer=asegment,
                                    sampling_rate=FRAME_RATE,
                                    sample_width=bytes_per_frame,
                                    channels=1)
        ads = ADSFactory.ads(audio_source=asource, block_dur=1. / sample_rate)
        ads.open()
        tokens = tokenizer.tokenize(ads)
        length = (len(asegment) // bytes_per_frame + frames_per_window -
                  1) // frames_per_window
        media_bstring = np.zeros(length + 1, dtype=int)
        for token in tokens:
            media_bstring[token[1]] += 1
            media_bstring[token[2] + 1] -= 1
        return np.cumsum(media_bstring)[:-1] > 0

    return _detect
 def test_callback(self):
     
     tokens = []
     
     def callback(data, start, end):
         tokens.append((data, start, end))
         
     
     tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=8,
                                 max_continuous_silence=3, init_min = 3,
                                 init_max_silence = 3, mode=0)
     
     data_source = StringDataSource("aaAAAAAAAAAAAAa")
     #                                 ^      ^^   ^
     #                                 2      910  14
     
     tokenizer.tokenize(data_source, callback=callback)
     
     self.assertEqual(len(tokens), 2, msg="wrong number of tokens, expected: 1, found: {0} ".format(len(tokens)))
    def test_min_5_max_10_max_continuous_silence_1(self):

        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=10,
                                    max_continuous_silence=1, init_min = 3,
                                    init_max_silence = 3, mode=0)
        
        data_source = StringDataSource("aaaAAAAAaAAAAAAaaAAAAAAAAAa")
        #                                  ^        ^^ ^ ^        ^
        #                                  3       12131517      26
        #                                         (12 13 15 17)
        
        tokens = tokenizer.tokenize(data_source)
                
        self.assertEqual(len(tokens), 3, msg="wrong number of tokens, expected: 3, found: {0} ".format(len(tokens)))
        tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]
        
        
        data = ''.join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(data, "AAAAAaAAAA",
                        msg="wrong data for token 1, expected: 'AAAAAaAAAA', found: '{0}' ".format(data))
        self.assertEqual(start, 3, msg="wrong start frame for token 1, expected: 3, found: {0} ".format(start))
        self.assertEqual(end, 12, msg="wrong end frame for token 1, expected: 10, found: {0} ".format(end))
        
        
        data = ''.join(tok2[0])
        start = tok2[1]
        end = tok2[2]
        self.assertEqual(data, "AAa",
                        msg="wrong data for token 1, expected: 'AAa', found: '{0}' ".format(data))
        self.assertEqual(start, 13, msg="wrong start frame for token 1, expected: 9, found: {0} ".format(start))
        self.assertEqual(end, 15, msg="wrong end frame for token 1, expected: 14, found: {0} ".format(end))
        
        
        data = ''.join(tok3[0])
        start = tok3[1]
        end = tok3[2]
        self.assertEqual(data, "AAAAAAAAAa",
                        msg="wrong data for token 1, expected: 'AAAAAAAAAa', found: '{0}' ".format(data))
        self.assertEqual(start, 17, msg="wrong start frame for token 1, expected: 17, found: {0} ".format(start))
        self.assertEqual(end, 26, msg="wrong end frame for token 1, expected: 26, found: {0} ".format(end))
示例#15
0
    def calibrate(self):
        '''
        This method calibrates the MinMaxScaler, self.scaler, by capturing 10 seconds
        of audio and applying MinMaxScaler fit method.
        See sklearn.preprocessing.MinMaxScaler for details.
        
        This is redundant, scaling is not necessary.
        
        '''
        a = raw_input(
            "Calibrate normalisation, press return then make noises from your mouth hole."
        )
        if self.audioPath == None:
            asource = ADSFactory.ads(sampling_rate=self.sr, max_time=10)
        else:
            asource = ADSFactory.ads(filename=self.audioPath,
                                     sampling_rate=self.sr,
                                     max_time=10)

        validator = AudioEnergyValidator(
            sample_width=asource.get_sample_width(),
            energy_threshold=self.energy)

        tokenizer = StreamTokenizer(validator=validator,
                                    min_length=self.min_len,
                                    max_length=self.max_len,
                                    max_continuous_silence=self.max_con_si)

        def calib_callback(data, start, end):
            audio = np.fromstring(data[0], dtype=np.int8)
            self.scaler.fit_transform(np.swapaxes(np.asarray([audio]), 0, 1))
            print "Audio sample found {0}--{1}".format(start, end)

        asource.open()

        tokenizer.tokenize(asource, callback=calib_callback)
        print "Scaler paramaters found: min: {0} max: {1}".format(
            self.scaler.data_min_, self.scaler.data_max_)

        print "calibration done"
        self.mini = self.scaler.data_min_
        self.maxi = self.scaler.data_max_
示例#16
0
    def test_min_length_1_init_max_length_1(self):

        tokenizer = StreamTokenizer(self.A_validator,
                                    min_length=1,
                                    max_length=1,
                                    max_continuous_silence=0,
                                    init_min=0,
                                    init_max_silence=0,
                                    mode=0)

        data_source = StringDataSource(
            "AAaaaAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaAAAAA")

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            21,
            msg="wrong number of tokens, expected: 21, found: {0} ".format(
                len(tokens)))
 def test_init_min_3_init_max_silence_2(self):
     
     tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=20,
                                  max_continuous_silence=4, init_min = 3,
                                  init_max_silence = 2, mode=0)
     
     
     data_source = StringDataSource("aAaaaAaAaaAaAaaaaaaAAAAAAAAAaaaaaaaAAAAA")
     #                                    ^          ^  ^           ^   ^   ^
     #                                    5          16 19          31  35  39
     tokens = tokenizer.tokenize(data_source)
             
     self.assertEqual(len(tokens), 3, msg="wrong number of tokens, expected: 3, found: {0} ".format(len(tokens)))
     tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]
     
     
     data = ''.join(tok1[0])
     start = tok1[1]
     end = tok1[2]
     self.assertEqual(data, "AaAaaAaAaaaa",
                     msg="wrong data for token 1, expected: 'AaAaaAaA', found: '{0}' ".format(data))
     self.assertEqual(start, 5, msg="wrong start frame for token 1, expected: 5, found: {0} ".format(start))
     self.assertEqual(end, 16, msg="wrong end frame for token 1, expected: 16, found: {0} ".format(end))
     
     
     data = ''.join(tok2[0])
     start = tok2[1]
     end = tok2[2]
     self.assertEqual(data, "AAAAAAAAAaaaa",
                     msg="wrong data for token 2, expected: 'AAAAAAAAAaaaa', found: '{0}' ".format(data))
     self.assertEqual(start, 19, msg="wrong start frame for token 2, expected: 19, found: {0} ".format(start))
     self.assertEqual(end, 31, msg="wrong end frame for token 2, expected: 31, found: {0} ".format(end))
     
     
     data = ''.join(tok3[0])
     start = tok3[1]
     end = tok3[2]
     self.assertEqual(data, "AAAAA",
                     msg="wrong data for token 3, expected: 'AAAAA', found: '{0}' ".format(data))
     self.assertEqual(start, 35, msg="wrong start frame for token 2, expected: 35, found: {0} ".format(start))
     self.assertEqual(end, 39, msg="wrong end frame for token 2, expected: 39, found: {0} ".format(end))    
示例#18
0
    def test_DROP_TAILING_SILENCE(self):

        tokenizer = StreamTokenizer(self.A_validator,
                                    min_length=5,
                                    max_length=10,
                                    max_continuous_silence=2,
                                    init_min=3,
                                    init_max_silence=3,
                                    mode=StreamTokenizer.DROP_TAILING_SILENCE)

        data_source = StringDataSource("aaAAAAAaaaaa")
        #                                 ^   ^
        #                                 2   6

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            1,
            msg="wrong number of tokens, expected: 1, found: {0} ".format(
                len(tokens)))
        tok1 = tokens[0]

        data = ''.join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AAAAA",
            msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".
            format(data))
        self.assertEqual(
            start,
            2,
            msg="wrong start frame for token 1, expected: 2, found: {0} ".
            format(start))
        self.assertEqual(
            end,
            6,
            msg="wrong end frame for token 1, expected: 6, found: {0} ".format(
                end))
    def __init__(self, _useGui):
        # parametros de áudio
        max_length = 1000000
        max_interval = 12000
        max_continuous_silence = 500
        min_length = 150

        self.sample_rate = 48000
        self.asource = ADSFactory.ads(record=True,
                                      max_time=max_length,
                                      sampling_rate=self.sample_rate)

        self.sample_width = self.asource.get_sample_width()
        self.channels = self.asource.get_channels()
        # START VALIDATOR
        self.validator = AudioEnergyValidator(
            sample_width=self.sample_width, energy_threshold=energy_threshold)
        self.tokenizer = StreamTokenizer(
            validator=self.validator,
            min_length=min_length,
            max_length=max_length,
            max_continuous_silence=max_continuous_silence)

        self.audio_folder = 'recordings/' + '{:%Y-%m-%d_%H-%M-%S}'.format(
            datetime.datetime.now()) + '/'
        if not os.path.exists(os.path.dirname(self.audio_folder)):
            try:
                os.makedirs(os.path.dirname(self.audio_folder))
            except OSError as exc:  # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raiseRec
        os.chmod('recordings', 0o777)
        os.chmod(self.audio_folder, 0o777)
        self.MODE = 'ECHO'

        self.useGui = _useGui

        if self.useGui:
            root = Tk()
            self.display = GUI(root, True)
            self.display.display_image()
示例#20
0
    def configure(self, rf):
        # Setting up rpc port
        self.portsList["rpc"] = yarp.Port()
        self.portsList["rpc"].open("/sentence_tokenizer/rpc:i")
        self.attach(self.portsList["rpc"])

        self.portsList["audio_out"] = yarp.BufferedPortBottle()
        self.portsList["audio_out"].open("/sentence_tokenizer/audio:o")

        # Setting up hotword detection
        self.hotword_detector = snowboydecoder.HotwordDetector(self.hotword_model, sensitivity=self.hotword_sensitivity)

        # Setting up audio tokenizer to split sentences
        self.audio_source = ADSFactory.ads(record=True, max_time=self.tok_record_duration, block_dur=self.tok_window)
        self.tok_validator = AudioEnergyValidator(sample_width=self.audio_source.get_sample_width(),
                                                  energy_threshold=self.tok_energy_threshold)
        self.tokenizer_mode = StreamTokenizer.DROP_TRAILING_SILENCE
        self.tokenizer = StreamTokenizer(validator=self.tok_validator,
                                         min_length=self.tok_min_len,
                                         max_length=self.tok_max_len,
                                         max_continuous_silence=self.tok_max_silence_duration,
                                         mode=self.tokenizer_mode)

        if self.echo_enabled:
            self.echo_thread = threading.Thread(target=self.replayAudio)
            self.echo_thread.start()

        if self.hotword_enabled:
            print("Waiting for hotword to start interaction")
            # self.hotword_detector.start(detected_callback=self.detected_callback,
            #                             interrupt_check=self.interrupt_callback,
            #                             sleep_time=self.hotword_loop_time)
            print("Hotword detected. Starting tokenizer thread")
        else:
            print "Starting tokenizer thread"

        self.asr = sr.Recognizer()

        with open('google_credentials.json', 'r') as credentials:
            self.google_credentials = credentials.read()
        return True
示例#21
0
    def transcribe_audio(self, stereo_path, channels_to_process):

        if not os.path.isfile(stereo_path):
            raise Exception("Audio file does not exist.")

        data = self.split_to_mono(stereo_path)

        a_leg = data['a_leg']
        b_leg = data['b_leg']

        data['a_leg'] = None
        data['b_leg'] = None

        validator = AudioEnergyValidator(sample_width=data['frame_width'], energy_threshold=45)
        trimmer = StreamTokenizer(validator,
                                  min_length=self.min_segment_length,
                                  max_length=self.max_segment_length,
                                  max_continuous_silence=self.max_continuous_silence,
                                  mode=StreamTokenizer.DROP_TAILING_SILENCE)

        segments = []
        if channels_to_process in ['A', 'AB']:
            a_source = ADSFactory.ads(audio_source=a_leg, record=True, block_size=data['frame_rate'] / self.divisor)
            a_source.open()
            trimmer.tokenize(a_source, callback=lambda data, start, end: segments.append(("A", data, start, end)))

        if channels_to_process in ['B', 'AB']:
            b_source = ADSFactory.ads(audio_source=b_leg, record=True, block_size=data['frame_rate'] / self.divisor)
            b_source.open()
            trimmer.tokenize(b_source, callback=lambda data, start, end: segments.append(("B", data, start, end)))

        segments = sorted(segments, key=lambda x: x[3])
        self.batch(segments, data['duration'], data['frame_rate'], data['frame_width'], data['nchannels'])
示例#22
0
def _make_auditok_detector(
    sample_rate: int, frame_rate: int, non_speech_label: float
) -> Callable[[bytes], np.ndarray]:
    try:
        from auditok import (
            BufferAudioSource,
            ADSFactory,
            AudioEnergyValidator,
            StreamTokenizer,
        )
    except ImportError as e:
        logger.error(
            """Error: auditok not installed!
        Consider installing it with `pip install auditok`. Note that auditok
        is GPLv3 licensed, which means that successfully importing it at
        runtime creates a derivative work that is GPLv3 licensed. For personal
        use this is fine, but note that any commercial use that relies on
        auditok must be open source as per the GPLv3!*
        *Not legal advice. Consult with a lawyer.
        """
        )
        raise e
    bytes_per_frame = 2
    frames_per_window = frame_rate // sample_rate
    validator = AudioEnergyValidator(sample_width=bytes_per_frame, energy_threshold=50)
    tokenizer = StreamTokenizer(
        validator=validator,
        min_length=0.2 * sample_rate,
        max_length=int(5 * sample_rate),
        max_continuous_silence=0.25 * sample_rate,
    )

    def _detect(asegment: bytes) -> np.ndarray:
        asource = BufferAudioSource(
            data_buffer=asegment,
            sampling_rate=frame_rate,
            sample_width=bytes_per_frame,
            channels=1,
        )
        ads = ADSFactory.ads(audio_source=asource, block_dur=1.0 / sample_rate)
        ads.open()
        tokens = tokenizer.tokenize(ads)
        length = (
            len(asegment) // bytes_per_frame + frames_per_window - 1
        ) // frames_per_window
        media_bstring = np.zeros(length + 1)
        for token in tokens:
            media_bstring[token[1]] = 1.0
            media_bstring[token[2] + 1] = non_speech_label - 1.0
        return np.clip(np.cumsum(media_bstring)[:-1], 0.0, 1.0)

    return _detect
 def test_STRICT_MIN_LENGTH_and_DROP_TAILING_SILENCE(self):
     
     tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=8,
                                 max_continuous_silence=3, init_min = 3,
                                 init_max_silence = 3, mode=StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TAILING_SILENCE)
     
     data_source = StringDataSource("aaAAAAAAAAAAAAaa")
     #                                 ^      ^
     #                                 2      8
     
     tokens = tokenizer.tokenize(data_source)
             
     self.assertEqual(len(tokens), 1, msg="wrong number of tokens, expected: 1, found: {0} ".format(len(tokens)))
     tok1 = tokens[0]
     
     
     data = ''.join(tok1[0])
     start = tok1[1]
     end = tok1[2]
     self.assertEqual(data, "AAAAAAAA",
                     msg="wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' ".format(data))
     self.assertEqual(start, 2, msg="wrong start frame for token 1, expected: 2, found: {0} ".format(start))
     self.assertEqual(end, 9, msg="wrong end frame for token 1, expected: 9, found: {0} ".format(end))
def _get_asr_result_whole(folder,prefix):
    asource = ADSFactory.ads(filename='./temp/{}.wav'.format(prefix), block_size=160)
    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=65)
    tokenizer = StreamTokenizer(validator=validator, min_length=300, max_length=1000, max_continuous_silence=50)
    asource.open()
    from pocketsphinx_decoder import decoder

    tokens = tokenizer.tokenize(asource)


    d = defaultdict(list)


    past = 0
    for content,start,end in tokens:
        save_audio_data(data=b''.join(content), filename='tmp.wav', filetype='wav', sr=asource.get_sampling_rate(),sw = asource.get_sample_width(),ch = asource.get_channels())
        decoder.start_utt()
        decoder.process_raw(open('tmp.wav','rb').read(),False,False)
        decoder.end_utt()
        seg = list(decoder.seg())
        print(' '.join([s.word for s in seg]))
        def add_feature(name,add=None):
            if add is None:
                d[name].extend(list(map(attrgetter(name),seg)))
            else:
                d[name].extend([attrgetter(name)(x)+add for x in seg])
        add_feature('start_frame',past)
        add_feature('end_frame',past)
        add_feature('word')
        add_feature('ascore')
        add_feature('lscore')
        add_feature('lback')
        add_feature('prob')
        past += len(content)
        df = pd.DataFrame(d)
        df = df[['start_frame','end_frame','ascore','lscore','lback','prob','word']]
        df.to_csv(path.join(folder ,'{}.csv'.format(prefix)), index=None)
class AudioDetection:
    def __init__(self, _useGui):
        # parametros de áudio
        max_length = 1000000
        max_interval = 12000
        max_continuous_silence = 500
        min_length = 150

        self.sample_rate = 48000
        self.asource = ADSFactory.ads(record=True,
                                      max_time=max_length,
                                      sampling_rate=self.sample_rate)

        self.sample_width = self.asource.get_sample_width()
        self.channels = self.asource.get_channels()
        # START VALIDATOR
        self.validator = AudioEnergyValidator(
            sample_width=self.sample_width, energy_threshold=energy_threshold)
        self.tokenizer = StreamTokenizer(
            validator=self.validator,
            min_length=min_length,
            max_length=max_length,
            max_continuous_silence=max_continuous_silence)

        self.audio_folder = 'recordings/' + '{:%Y-%m-%d_%H-%M-%S}'.format(
            datetime.datetime.now()) + '/'
        if not os.path.exists(os.path.dirname(self.audio_folder)):
            try:
                os.makedirs(os.path.dirname(self.audio_folder))
            except OSError as exc:  # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raiseRec
        os.chmod('recordings', 0o777)
        os.chmod(self.audio_folder, 0o777)
        self.MODE = 'ECHO'

        self.useGui = _useGui

        if self.useGui:
            root = Tk()
            self.display = GUI(root, True)
            self.display.display_image()

    def start(self):
        self.listener = keyboard.Listener(on_press=self.on_press,
                                          on_release=self.on_release)
        self.listener.start()
        self.listen()

    def listen(self):
        ## abrir microfone
        self.asource.open()
        print("\n  ** Listening!!!")
        ## começar tokenizer
        self.tokenizer.tokenize(self.asource, callback=self.onDetection)
        self.asource.close()

    def on_press(self, key):
        return ''

    def on_release(self, key):
        if hasattr(key, 'char'):
            if key.char == 'e':
                self.MODE = 'ECHO'
                print('set mode to', self.MODE)
            if key.char == 'r':
                self.MODE = 'RANDOM'
                print('set mode to', self.MODE)
            if key.char == 'q':
                self.asource.close()
                sys.exit(0)

    def onDetection(self, data, start, end):
        name = "{0}-{1}".format(start, end) + '.wav'
        print(name)
        filename = self.savefile(data, start, end)
        print('current mode', self.MODE)
        if self.MODE == 'RANDOM':
            randomfile = player.getRandomFile(play_folder)
            player.play(randomfile)
        if self.MODE == 'ECHO':
            player.play(filename)
        self.display.display_image()
        print("finished playing")

    def savefile(self, data, start, end):
        name = "{0}-{1}".format(start, end) + '.wav'
        filename = self.audio_folder + name

        # save wav file
        waveFile = wave.open(filename, 'wb')
        waveFile.setnchannels(self.channels)
        waveFile.setsampwidth(self.sample_width)
        waveFile.setframerate(self.sample_rate)
        waveFile.writeframes(b''.join(data))
        waveFile.close()

        # normalize volume
        sound = AudioSegment.from_file(filename, "wav")
        normalized_sound = self.match_target_amplitude(sound, -15.0)
        with_fade = normalized_sound.fade_in(200).fade_out(200)
        with_fade.export(filename, format="wav")

        print('audio saved at', filename)
        return filename

    def match_target_amplitude(self, sound, target_dBFS):
        change_in_dBFS = target_dBFS - sound.dBFS
        return sound.apply_gain(change_in_dBFS)
示例#26
0
     duration = float(sys.argv[2])

   # record = True so that we'll be able to rewind the source.
   # max_time = 10: read 10 seconds from the microphone
   asource = ADSFactory.ads(record=True, max_time = duration, sampling_rate = sample_rate)

   # params 
   # sample_rate = asource.get_sampling_rate()
   sample_width = asource.get_sample_width()
   channels = asource.get_channels()
   chunk = 1024

   print(sample_width, sample_rate)
   
   validator = AudioEnergyValidator(sample_width=sample_width, energy_threshold = energy_threshold)
   tokenizer = StreamTokenizer(validator=validator, min_length=70, max_length=20000, max_continuous_silence=100)

   p = pyaudio.PyAudio()
   
   for i in range(p.get_device_count()):
    dev = p.get_device_info_by_index(i)
    print((i,dev['name'],dev['maxInputChannels']))

   def savefile(data, start, end):
      print('-----------------------')
      print("Acoustic activity at: {0}--{1}".format(start, end))  
      filename = "teste_{0}_{1}.wav".format(start, end)
      waveFile = wave.open(filename, 'wb')
      waveFile.setnchannels(channels)
      waveFile.setsampwidth(sample_width)
      waveFile.setframerate(channels)
示例#27
0
    def test_min_5_max_10_max_continuous_silence_1(self):

        tokenizer = StreamTokenizer(
            self.A_validator,
            min_length=5,
            max_length=10,
            max_continuous_silence=1,
            init_min=3,
            init_max_silence=3,
            mode=0,
        )

        data_source = StringDataSource("aaaAAAAAaAAAAAAaaAAAAAAAAAa")
        #                                  ^        ^^ ^ ^        ^
        #                                  3       12131517      26
        #                                         (12 13 15 17)

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            3,
            msg="wrong number of tokens, expected: 3, found: {0} ".format(
                len(tokens)),
        )
        tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]

        data = "".join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AAAAAaAAAA",
            msg=("wrong data for token 1, expected: 'AAAAAaAAAA', "
                 "found: '{0}' ").format(data),
        )
        self.assertEqual(
            start,
            3,
            msg=("wrong start frame for token 1, expected: 3, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            12,
            msg=("wrong end frame for token 1, expected: 10, found: {0} "
                 ).format(end),
        )

        data = "".join(tok2[0])
        start = tok2[1]
        end = tok2[2]
        self.assertEqual(
            data,
            "AAa",
            msg=("wrong data for token 1, expected: 'AAa', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            13,
            msg=("wrong start frame for token 1, expected: 9, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            15,
            msg=("wrong end frame for token 1, expected: 14, found: {0} "
                 ).format(end),
        )

        data = "".join(tok3[0])
        start = tok3[1]
        end = tok3[2]
        self.assertEqual(
            data,
            "AAAAAAAAAa",
            msg=("wrong data for token 1, expected: 'AAAAAAAAAa', "
                 "found: '{0}' ").format(data),
        )
        self.assertEqual(
            start,
            17,
            msg=("wrong start frame for token 1, expected: 17, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            26,
            msg=("wrong end frame for token 1, expected: 26, found: {0} "
                 ).format(end),
        )
示例#28
0
文件: echo.py 项目: ixtel/auditok
    energy_threshold = 45
    duration = 10  # seconds

    if len(sys.argv) > 1:
        energy_threshold = float(sys.argv[1])

    if len(sys.argv) > 2:
        duration = float(sys.argv[2])

    # record = True so that we'll be able to rewind the source.
    # max_time = 10: read 10 seconds from the microphone
    asource = ADSFactory.ads(record=True, max_time=duration)

    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=energy_threshold)
    tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=250, max_continuous_silence=30)

    player = player_for(asource)

    def echo(data, start, end):
        print("Acoustic activity at: {0}--{1}".format(start, end))
        player.play(b"".join(data))

    asource.open()

    print("\n  ** Make some noise (dur:{}, energy:{})...".format(duration, energy_threshold))

    tokenizer.tokenize(asource, callback=echo)

    asource.close()
    player.stop()
      if w is None:
         break
      original_signal.append(w)

   original_signal = b''.join(original_signal)


   # rewind source
   asource.rewind()

   # Create a validator with an energy threshold of 50
   validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=50)

   # Create a tokenizer with an unlimited token length and continuous silence within a token
   # Note the DROP_TRAILING_SILENCE mode that will ensure removing trailing silence
   trimmer = StreamTokenizer(validator, min_length = 20, max_length=99999999,
                             max_continuous_silence=9999999, mode=StreamTokenizer.DROP_TRAILING_SILENCE, init_min=3, init_max_silence=1)


   tokens = trimmer.tokenize(asource)

   # Make sure we only have one token
   assert len(tokens) == 1, "Should have detected one single token"

   trimmed_signal = b''.join(tokens[0][0])

   player = player_for(asource)

   print("\n ** Playing original signal (with leading and trailing silence)...")
   player.play(original_signal)
   print("\n ** Playing trimmed signal...")
   player.play(trimmed_signal)
class StreamReader():

    categories = [
        'None',
        'akatambi',
        'app',
        'bluetooth',
        'damu',
        'fayilo',
        'folder',
        'galawo',
        'gulawo',
        'koma awo',
        'lekera awo',
        'menu',
        'task_manager',
        'vaako',
        'webake',
        'wezeeko',
        'wifi',
        'wumula',
        'zanya',
        'zikira',
    ]
    commands = [
        '', '', '', '', '', '', '', 'nautilus', 'nautilus',
        'pkill,-KILL -u $USER', '', '', 'gnome-system-monitor,',
        'shutdown,-rf,now', 'shutdown,-f,now', '', '', '', '',
        'shutdown,-f,now'
    ]
    '''
	categories = ['None', 'damu', 'galawo', 'menu', 'task_manager', 'wuumula']
	commands = ['', 'Unset', 'pkill,-KILL -u $USER', 'nautilus,', 'gnome-system-monitor,', 'shutdown,-f']'''

    recognized_keyword = None
    command = None

    def __init__(self):
        self.asource = ADSFactory.ads(record=True, max_time=4)
        self.validator = AudioEnergyValidator(sample_width=2,
                                              energy_threshold=50)
        self.tokenizer = StreamTokenizer(validator=self.validator,
                                         min_length=20,
                                         max_length=1000,
                                         max_continuous_silence=30)
        self.player = player_for(self.asource)

        self.model = self.load_cnn('../model/final_cnn_model.json',
                                   '../model/weights_final_cnn.h5')
        self.model.compile(loss='categorical_crossentropy',
                           optimizer='sgd',
                           metrics=['accuracy'])

    #CALLBACK FOR NEXT PROCESSING
    def echo(self, data, start, end):
        if (self.audio_file is not None):
            recording = self.asource.read()
        else:
            recording = b''.join(data)
            print("Acoustic Activity at: {0}--{1}".format(start, end))

        print(recording)
        #data = np.array(data)
        #serialized = np.frombuffer(data)

        #print(len(hex_data))
        #print(len(recording))
        normalize(recording)
        pad_tokens('tmp.wav2')
        sample_rate, normalized_signal = wavfile.read('tmp.wav2')
        print(sample_rate)
        print(len(normalized_signal))
        banks = convert_to_mel(normalized_signal)
        banks = np.array(banks)
        Banks = banks.reshape(1, 98, 40, 1)
        #np.save('test.npy', Banks)

        z = self.model.predict(Banks)
        p = z[0].tolist().index(max(z[0]))
        self.recognized_keyword = self.categories[p]
        print(self.recognized_keyword)
        '''
		frame_length, step_size = 16000, 64000
		no_of_shifts = int(64000 / step_size) - int(frame_length / step_size)
		print(no_of_shifts)
		#keyword = None
		prob = 0
		for i in range(0, no_of_shifts):
			l = int(i*320)
			banks = convert_to_mel(normalized_signal[l:l+frame_length])
			banks = np.array(banks)
			np.save('test.npy', banks)
			#banks = np.load('sd.npy')
			Banks = banks.reshape(1, 98, 40, 1)
			z = self.model.predict(Banks)
			print(z)
			p = z[0].tolist().index(max(z[0]))
			prob += p
			self.recognized_keyword = self.categories[p]
			#self.recognized_keyword = self.categories[np.argmax(z[0], -1)]
			#com = self.commands[p]
			print(self.recognized_keyword)
		final = int(np.ceil(prob/no_of_shifts))
		print(prob/no_of_shifts)
		print(final)
		self.recognized_keyword = self.categories[final]'''

        K.clear_session()
        os.remove('tmp.wav2')
        os.remove('tmp.wav')

        #self.command = re.split(',', com)
        #print(max(z[0]))
        #print(self.recognized_keyword)
        #subprocess.call([self.command[0], self.command[1]])
        #print(normalized_signal.get_array_of_samples())

        #print(normalized_signal.get_array_of_samples())
        #frame = np.array()

        #with open(datetime.today().isoformat() + 'XX.txt', 'w') as f:
        #	f.write(recording)
        #player.play(recording)

    def load_cnn(self, name, weights):
        with open(name, 'r') as json_file:
            loaded_model_json = json_file.read()
        loaded_model = model_from_json(loaded_model_json)
        # Load weights into new model
        loaded_model.load_weights(weights)
        print("loaded model from disk")
        return loaded_model

    def main(self, audio_file=None):
        self.audio_file = audio_file
        if (self.audio_file is None):
            print("RECORDING STARTED")
            self.asource.open()
            self.tokenizer.tokenize(self.asource, callback=self.echo)
        else:
            print("AUDIO PROCESSING STARTED")
            #DELETE TOKENS FOLDER
            shutil.rmtree('../../tokens', ignore_errors=True)
            shutil.rmtree('../../data', ignore_errors=True)
            #CREATE tokens/tmp FOLDER
            os.makedirs('../../tokens/temp')
            #CREATE tmp/test AND tmp/train
            os.makedirs('../../tokens/temp/test')
            os.makedirs('../../tokens/temp/train')
            #COPY AUDIO FILE TO tokens/tmp/test as test.wave
            shutil.copyfile(self.audio_file, '../../tokens/temp/test/tf.wav')
            shutil.copyfile(self.audio_file, '../../tokens/temp/train/tf2.wav')
            #CALL SUBPROCESS TO RUN ./dump.sh
            with cd('../../'):
                subprocess.call(['./dump.sh', ''])
                #OPEN FILE IN /data/test/*.npy
                files = [file for file in os.listdir('./data/temp/test/')]
                #PREDICT DATA
                data = np.load('./data/temp/test/' + files[0])
                banks = data.reshape(1, 98, 40, 1)
                z = self.model.predict(banks)
                p = z[0].tolist().index(max(z[0]))
                self.recognized_keyword = self.categories[p]
                print(self.recognized_keyword)
                K.clear_session()
示例#31
0
    def test_init_min_3_init_max_silence_2(self):

        tokenizer = StreamTokenizer(
            self.A_validator,
            min_length=5,
            max_length=20,
            max_continuous_silence=4,
            init_min=3,
            init_max_silence=2,
            mode=0,
        )

        data_source = StringDataSource(
            "aAaaaAaAaaAaAaaaaaaAAAAAAAAAaaaaaaaAAAAA")
        #         ^          ^  ^           ^   ^   ^
        #         5          16 19          31  35  39
        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            3,
            msg="wrong number of tokens, expected: 3, found: {0} ".format(
                len(tokens)),
        )
        tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]

        data = "".join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AaAaaAaAaaaa",
            msg=("wrong data for token 1, expected: 'AaAaaAaA', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            5,
            msg=("wrong start frame for token 1, expected: 5, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            16,
            msg=("wrong end frame for token 1, expected: 16, found: {0} "
                 ).format(end),
        )

        data = "".join(tok2[0])
        start = tok2[1]
        end = tok2[2]
        self.assertEqual(
            data,
            "AAAAAAAAAaaaa",
            msg=("wrong data for token 2, expected: 'AAAAAAAAAaaaa', "
                 "found: '{0}' ").format(data),
        )
        self.assertEqual(
            start,
            19,
            msg=("wrong start frame for token 2, expected: 19, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            31,
            msg=("wrong end frame for token 2, expected: 31, found: {0} "
                 ).format(end),
        )

        data = "".join(tok3[0])
        start = tok3[1]
        end = tok3[2]
        self.assertEqual(
            data,
            "AAAAA",
            msg=("wrong data for token 3, expected: 'AAAAA', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            35,
            msg=("wrong start frame for token 2, expected: 35, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            39,
            msg=("wrong end frame for token 2, expected: 39, found: {0} "
                 ).format(end),
        )
示例#32
0
    def test_init_min_0_init_max_silence_0(self):

        tokenizer = StreamTokenizer(
            self.A_validator,
            min_length=5,
            max_length=20,
            max_continuous_silence=4,
            init_min=0,
            init_max_silence=0,
            mode=0,
        )

        data_source = StringDataSource("aAaaaAaAaaAaAaaaaaaaAAAAAAAA")
        #                                ^              ^   ^      ^
        #                                2              16  20     27
        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            2,
            msg="wrong number of tokens, expected: 2, found: {0} ".format(
                len(tokens)),
        )
        tok1, tok2 = tokens[0], tokens[1]

        # tok1[0]: data
        # tok1[1]: start frame (included)
        # tok1[2]: end frame (included)

        data = "".join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AaaaAaAaaAaAaaaa",
            msg=("wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', "
                 "found: {0} ").format(data),
        )
        self.assertEqual(
            start,
            1,
            msg=("wrong start frame for token 1, expected: 1, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            16,
            msg=("wrong end frame for token 1, expected: 16, found: {0} "
                 ).format(end),
        )

        data = "".join(tok2[0])
        start = tok2[1]
        end = tok2[2]
        self.assertEqual(
            data,
            "AAAAAAAA",
            msg=("wrong data for token 1, expected: 'AAAAAAAA', found: {0} "
                 ).format(data),
        )
        self.assertEqual(
            start,
            20,
            msg=("wrong start frame for token 2, expected: 20, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            27,
            msg=("wrong end frame for token 2, expected: 27, found: {0} "
                 ).format(end),
        )
示例#33
0
    def extractEvents(path, patientID):

        yname = os.path.basename(path)
        yname = yname[:len(yname) - 4]

        dest_path = '/home/pi/recordings/' + patientID + '/' + yname + '/'
        if not os.path.exists(dest_path):
            os.makedirs(dest_path)

        fsoriginal, y = wavfile.read(path)  # read audio file
        try:

            r, c = np.shape(y)
            if c > 1:
                y = np.delete(y, 1, axis=1)
                # print("audio file shape:  ", numpy.shape(y))
        except:
            print(' ')

        wavfile.write('/home/pi/coughanalysis_ann/sample.wav',
                      data=y,
                      rate=44100)

        asource = ADSFactory.ads(
            filename='/home/pi/coughanalysis_ann/sample.wav', record=True)

        validator = AudioEnergyValidator(
            sample_width=asource.get_sample_width(), energy_threshold=65)

        # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
        # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
        # max_length=4000 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
        # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms

        # For a sampling rate of 16KHz (16000 samples per second), we have 160 samples for 10 ms.

        tokenizer = StreamTokenizer(validator=validator,
                                    min_length=10,
                                    max_length=1000,
                                    max_continuous_silence=40)

        asource.open()
        tokens = tokenizer.tokenize(asource)

        # Play detected regions back

        # player = player_for(asource)

        # Rewind and read the whole signal
        asource.rewind()
        original_signal = []

        while True:
            w = asource.read()
            if w is None:
                break
            original_signal.append(w)

        original_signal = ''.join(original_signal)

        # print("Playing the original file...")
        # player.play(original_signal)

        # print("playing detected regions...")
        count = 0
        for t in tokens:
            # print("Token starts at {0} and ends at {1}".format(t[1], t[2]))
            data = ''.join(t[0])
            # player.play(data)

            fp = wave.open(dest_path + yname + str(count) + '.wav', "w")
            fp.setnchannels(asource.get_channels())
            fp.setsampwidth(asource.get_sample_width())
            fp.setframerate(asource.get_sampling_rate())
            fp.writeframes(data)
            fp.close()
            count += 1

        return dest_path
示例#34
0
    #check os system and set sample rate 48000 for Linux (Raspberry Pi)
    _os = platform.system()
    if (_os == 'Darwin') or (_os == 'Windows'):  # macOs
        sample_rate = asource.get_sampling_rate()

    # get sample width and channels from ads factory
    sample_width = asource.get_sample_width()
    channels = asource.get_channels()

    # START VALIDATOR
    validator = AudioEnergyValidator(sample_width=sample_width,
                                     energy_threshold=energy_threshold)
    tokenizer = StreamTokenizer(
        validator=validator,
        min_length=min_length,
        max_length=max_length,
        max_continuous_silence=max_continuous_silence)  #

    # LOAD PYAUDIO
    p = pyaudio.PyAudio()

    # start classe memoria
    _memoria = memoria.Memoria()

    # gui vars
    if GUI:
        root = Tk()
        display = GUI(root)

    if TRANSCRIPTION:
示例#35
0
class sentence_tokenizer(yarp.RFModule):
    def __init__(self):
        yarp.RFModule.__init__(self)
        self.interrupted = False
        self.portsList = dict()
        self.hotword_detector = None
        self.hotword_model = None
        self.token_out_port = None
        self.audio_source = None
        self.tok_validator = None
        self.tokenizer = None
        self.player = None
        self.pause_tokenizer = False

        self.echo_enabled = False
        self.trigger_echo = False
        self.echo_thread = None
        self.hotword_enabled = True

        # Hotword settings
        self.hotword_sensitivity = 0.5
        self.hotword_loop_time = 0.03
        self.hotword_model = os.environ['HOTWORD_MODEL']

        # Tokenizer Settings
        # self.tok_record_duration = None means indefinite
        self.tok_record_duration = None
        self.tok_energy_threshold = 40 #60
        self.tok_window = 0.01 # 0.01
        self.tok_window_rate = 1. / self.tok_window
        self.tok_min_len = 0.5 * self.tok_window_rate
        self.tok_max_len = int(5 * self.tok_window_rate)
        self.tok_max_silence_duration = 0.7 * self.tok_window_rate
        self.tokenizer_mode = None
        self.bdata = None
        self.google_credentials = None

        # Google ASR
        self.use_google = True
        self.asr = None
        self.time_total = 0
        self.num_recs = 0
        self.phrases = ["Hello i cub",
                        "Goodbye i cub",
                        "i cub",
                        "Tony",
                        "Daniel"]

    def configure(self, rf):
        # Setting up rpc port
        self.portsList["rpc"] = yarp.Port()
        self.portsList["rpc"].open("/sentence_tokenizer/rpc:i")
        self.attach(self.portsList["rpc"])

        self.portsList["audio_out"] = yarp.BufferedPortBottle()
        self.portsList["audio_out"].open("/sentence_tokenizer/audio:o")

        # Setting up hotword detection
        self.hotword_detector = snowboydecoder.HotwordDetector(self.hotword_model, sensitivity=self.hotword_sensitivity)

        # Setting up audio tokenizer to split sentences
        self.audio_source = ADSFactory.ads(record=True, max_time=self.tok_record_duration, block_dur=self.tok_window)
        self.tok_validator = AudioEnergyValidator(sample_width=self.audio_source.get_sample_width(),
                                                  energy_threshold=self.tok_energy_threshold)
        self.tokenizer_mode = StreamTokenizer.DROP_TRAILING_SILENCE
        self.tokenizer = StreamTokenizer(validator=self.tok_validator,
                                         min_length=self.tok_min_len,
                                         max_length=self.tok_max_len,
                                         max_continuous_silence=self.tok_max_silence_duration,
                                         mode=self.tokenizer_mode)

        if self.echo_enabled:
            self.echo_thread = threading.Thread(target=self.replayAudio)
            self.echo_thread.start()

        if self.hotword_enabled:
            print("Waiting for hotword to start interaction")
            # self.hotword_detector.start(detected_callback=self.detected_callback,
            #                             interrupt_check=self.interrupt_callback,
            #                             sleep_time=self.hotword_loop_time)
            print("Hotword detected. Starting tokenizer thread")
        else:
            print "Starting tokenizer thread"

        self.asr = sr.Recognizer()

        with open('google_credentials.json', 'r') as credentials:
            self.google_credentials = credentials.read()
        return True

    def detected_callback(self):
        print("Hotword 'Hello iCub' detected")
        self.interrupted = True

    def tok_callback(self, data, start, end, starting=False):
        if data is None:

            audio_bottle = self.portsList["audio_out"].prepare()
            audio_bottle.clear()
            audio_bottle.addString("speaking")
            if starting:
                print "Speaking start"
                audio_bottle.addString("start")
            else:
                print "Speaking stop"
                audio_bottle.addString("stop")
            self.portsList["audio_out"].write()
        else:
            print("Acoustic activity at: {0}--{1}".format(start, end))
            # print "Chunk segmented", time.time()
            # print "Pause value is: ", self.pause_tokenizer
            if not self.pause_tokenizer:
                self.bdata = b''.join(data)

                if self.use_google:
                    audio = sr.AudioData(self.bdata, self.audio_source.get_sampling_rate(),
                                         self.audio_source.get_sample_width())
                    t3 = time.time()
                    try:

                        sentence = self.asr.recognize_google_cloud(audio_data=audio,
                                                                   credentials_json=self.google_credentials,
                                                                   language="en-UK",
                                                                   preferred_phrases=self.phrases)

                        t4 = time.time()
                        dur = t4 - t3
                        self.time_total += dur
                        self.num_recs += 1
                        print sentence, " | Time taken=", dur, " | Mean Time=", self.time_total/self.num_recs
                        audio_bottle = self.portsList["audio_out"].prepare()
                        audio_bottle.clear()
                        audio_bottle.addString("spoken")
                        audio_bottle.addString(str(sentence))
                        self.portsList["audio_out"].write()
                    except sr.UnknownValueError:
                        print("Google Speech Recognition could not understand audio")
                    except sr.RequestError as e:
                        print("Could not request results from Google Speech Recognition service; {0}".format(e))
                else:
                    audio_bottle = self.portsList["audio_out"].prepare()
                    audio_bottle.clear()
                    audio_bottle.addString("classify")
                    audio_bottle.addString(self.bdata)
                    audio_bottle.addInt(self.audio_source.get_sampling_rate())
                    self.portsList["audio_out"].write()

                if self.echo_enabled:
                    self.trigger_echo = True

    def tokenizerThread(self):
        self.audio_source.open()
        self.tokenizer.tokenize(self.audio_source, callback=self.tok_callback)

    def replayAudio(self):
        self.player = player_for(self.audio_source)
        while True:
            if self.trigger_echo:
                self.player.play(self.bdata)
                self.trigger_echo = False
            time.sleep(2)

    def close(self):
        print('Exiting ...')
        time.sleep(2)
        self.hotword_detector.terminate()
        self.audio_source.close()

        if self.echo_enabled:
            self.player.stop()

        for j in self.portsList.keys():
            self.close_port(self.portsList[j])

        return True

    @staticmethod
    def close_port(j):
        j.interrupt()
        time.sleep(1)
        j.close()

    def respond(self, command, reply):
        reply.clear()
        action = command.get(0).asString()

        if action == "heartbeat":
            reply.addString('ack')
        elif action == "pause":
            self.pause_tokenizer = True
            print "pausing tokenizer sending"
            reply.addString('ack')
        elif action == "resume":
            self.pause_tokenizer = False
            print "resuming tokenizer sending"
            reply.addString('ack')
        # -------------------------------------------------
        elif action == "EXIT":
            reply.addString('ack')
            self.close()
        # -------------------------------------------------
        else:
            reply.addString("nack")
            reply.addString("Command not recognized")

        return True

    def interruptModule(self):
        print "Interrupting"
        self.close()
        return True

    def getPeriod(self):
        return 0.1

    def updateModule(self):
        self.tokenizerThread()
        print "starting again"
        time.sleep(0.05)
        return True
示例#36
0
def find_voice_segments(audio_file, music_time_list):
    segments = []
    formats = {1: numpy.int8, 2: numpy.int16, 4: numpy.int32}
    #[Fs_cr, x_cr] = aIO.readAudioFile(input_audio_audio_file)
    #[Fs_ce, x_ce] = aIO.readAudioFile(callee_audio_file)
    #segments = aS.silenceRemoval(x_cr, Fs_cr, 0.010, 0.010, smoothWindow=3,Weight=0.3,plot=False)
    #print(segments)
    #callee_segments = aS.silenceRemoval(x_ce, Fs_ce, 0.010, 0.010, smoothWindow=5,Weight=0.3,plot=False)
    #print(callee_segments)

    test_source = ADSFactory.ads(filename=audio_file, record=False)
    test_source.open()
    i = 0
    max_value = 0.0
    a = numpy.empty([], dtype=numpy.float64)
    b = numpy.empty([], dtype=numpy.float64)
    while True:
        frame = test_source.read()

        if frame is None:
            break

        signal = numpy.array(numpy.frombuffer(
            frame, dtype=formats[test_source.get_sample_width()]),
                             dtype=numpy.float64)
        energy = float(numpy.dot(signal, signal)) / len(signal)
        max_value = max(max_value, energy)
        i += 1
        b = numpy.append(b, [energy])

    #diff = max_value - numpy.mean(b)
    #print(10. * numpy.log10(0.3*diff))
    log_max = 10. * numpy.log10(max_value)
    log_mean = 10. * numpy.log10(numpy.mean(b))
    tmp = log_max - log_mean
    threshold = log_mean + 0.4 * tmp
    #print(threshold)

    test_source.close()
    asource = ADSFactory.ads(filename=audio_file, record=False)
    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                     energy_threshold=threshold)
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=300,
                                max_length=99999999,
                                max_continuous_silence=300)
    player = player_for(asource)

    asource.open()
    tokens = tokenizer.tokenize(asource)
    for i, t in enumerate(tokens):
        segment_begin = t[1] * 10
        segment_end = t[2] * 10

        if len(music_time_list) > 0:
            for item in music_time_list:
                # if segment end includes music begin
                if segment_end > item[0]:
                    #include segment before music
                    segments.append([segment_begin, item[0]])
                    #save stamps for incluing segment after music
                    segment_begin = item[1]
                    # remove music segment from list
                    # to not use it in further
                    music_time_list.remove(item)

        segments.append([segment_begin, segment_end])

    asource.close()
    return segments
from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for, dataset
import sys

try:

   # We set the `record` argument to True so that we can rewind the source
   asource = ADSFactory.ads(filename=dataset.one_to_six_arabic_16000_mono_bc_noise, record=True)

   validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=65)

   # Defalut analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
   # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
   # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
   # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms 
   tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=400, max_continuous_silence=30)

   asource.open()
   tokens = tokenizer.tokenize(asource)

   # Play detected regions back
   player = player_for(asource)

   # Rewind and read the whole signal
   asource.rewind()
   original_signal = []

   while True:
      w = asource.read()
      if w is None:
         break
示例#38
0
    def test_init_min_3_init_max_silence_0(self):

        tokenizer = StreamTokenizer(self.A_validator,
                                    min_length=5,
                                    max_length=20,
                                    max_continuous_silence=4,
                                    init_min=3,
                                    init_max_silence=0,
                                    mode=0)

        data_source = StringDataSource(
            "aAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaaAAAAA")
        #                                                 ^           ^  ^   ^
        #                                                 18          30 33  37

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            2,
            msg="wrong number of tokens, expected: 2, found: {0} ".format(
                len(tokens)))
        tok1, tok2 = tokens[0], tokens[1]

        data = ''.join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AAAAAAAAAaaaa",
            msg=
            "wrong data for token 1, expected: 'AAAAAAAAAaaaa', found: '{0}' ".
            format(data))
        self.assertEqual(
            start,
            18,
            msg="wrong start frame for token 1, expected: 18, found: {0} ".
            format(start))
        self.assertEqual(
            end,
            30,
            msg="wrong end frame for token 1, expected: 30, found: {0} ".
            format(end))

        data = ''.join(tok2[0])
        start = tok2[1]
        end = tok2[2]
        self.assertEqual(
            data,
            "AAAAA",
            msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".
            format(data))
        self.assertEqual(
            start,
            33,
            msg="wrong start frame for token 2, expected: 33, found: {0} ".
            format(start))
        self.assertEqual(
            end,
            37,
            msg="wrong end frame for token 2, expected: 37, found: {0} ".
            format(end))
示例#39
0
    def test_min_length_10_init_max_length_20(self):

        tokenizer = StreamTokenizer(
            self.A_validator,
            min_length=10,
            max_length=20,
            max_continuous_silence=4,
            init_min=3,
            init_max_silence=3,
            mode=0,
        )

        data_source = StringDataSource(
            "aAaaaAaAaaAaAaaaaaaAAAAAaaaaaaAAAAAaaAAaaAAA")
        #     ^              ^             ^            ^
        #     1              16            30           45

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            2,
            msg="wrong number of tokens, expected: 2, found: {0} ".format(
                len(tokens)),
        )
        tok1, tok2 = tokens[0], tokens[1]

        data = "".join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AaaaAaAaaAaAaaaa",
            msg=("wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', "
                 "found: '{0}' ").format(data),
        )
        self.assertEqual(
            start,
            1,
            msg=("wrong start frame for token 1, expected: 1, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            16,
            msg=("wrong end frame for token 1, expected: 16, found: {0} "
                 ).format(end),
        )

        data = "".join(tok2[0])
        start = tok2[1]
        end = tok2[2]
        self.assertEqual(
            data,
            "AAAAAaaAAaaAAA",
            msg=("wrong data for token 2, expected: 'AAAAAaaAAaaAAA', "
                 "found: '{0}' ").format(data),
        )
        self.assertEqual(
            start,
            30,
            msg=("wrong start frame for token 2, expected: 30, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            43,
            msg=("wrong end frame for token 2, expected: 43, found: {0} "
                 ).format(end),
        )
try:

    # We set the `record` argument to True so that we can rewind the source
    asource = ADSFactory.ads(
        filename=dataset.one_to_six_arabic_16000_mono_bc_noise, record=True)

    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                     energy_threshold=65)

    # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
    # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
    # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
    # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=20,
                                max_length=400,
                                max_continuous_silence=30)

    asource.open()
    tokens = tokenizer.tokenize(asource)

    # Play detected regions back
    player = player_for(asource)

    # Rewind and read the whole signal
    asource.rewind()
    original_signal = []

    while True:
        w = asource.read()
        if w is None:
示例#41
0
    def test_min_length_4_init_max_length_5(self):

        tokenizer = StreamTokenizer(
            self.A_validator,
            min_length=4,
            max_length=5,
            max_continuous_silence=4,
            init_min=3,
            init_max_silence=3,
            mode=0,
        )

        data_source = StringDataSource(
            "aAaaaAaAaaAaAaaaaaAAAAAAAAaaaaaaAAAAAaaaaaAAaaAaa")
        #                      ^   ^^   ^    ^   ^     ^   ^
        #                      18 2223  27   32  36    42  46

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            4,
            msg="wrong number of tokens, expected: 4, found: {0} ".format(
                len(tokens)),
        )
        tok1, tok2, tok3, tok4 = tokens[0], tokens[1], tokens[2], tokens[3]

        data = "".join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AAAAA",
            msg=("wrong data for token 1, expected: 'AAAAA', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            18,
            msg=("wrong start frame for token 1, expected: 18, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            22,
            msg=("wrong end frame for token 1, expected: 22, found: {0} "
                 ).format(end),
        )

        data = "".join(tok2[0])
        start = tok2[1]
        end = tok2[2]
        self.assertEqual(
            data,
            "AAAaa",
            msg=("wrong data for token 1, expected: 'AAAaa', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            23,
            msg=("wrong start frame for token 1, expected: 23, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            27,
            msg=("wrong end frame for token 1, expected: 27, found: {0} "
                 ).format(end),
        )

        data = "".join(tok3[0])
        start = tok3[1]
        end = tok3[2]
        self.assertEqual(
            data,
            "AAAAA",
            msg=("wrong data for token 1, expected: 'AAAAA', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            32,
            msg=("wrong start frame for token 1, expected: 1, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            36,
            msg=("wrong end frame for token 1, expected: 7, found: {0} "
                 ).format(end),
        )

        data = "".join(tok4[0])
        start = tok4[1]
        end = tok4[2]
        self.assertEqual(
            data,
            "AAaaA",
            msg=("wrong data for token 2, expected: 'AAaaA', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            42,
            msg=("wrong start frame for token 2, expected: 17, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            46,
            msg=("wrong end frame for token 2, expected: 22, found: {0} "
                 ).format(end),
        )
'''
# record = True so that we'll be able to rewind the source.
# max_time = 10: read 10 seconds from the microphone
asource = ADSFactory.ads(record=True)

validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=50)
tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=250, max_continuous_silence=30)

player = player_for(asource)
asource.open()

tokenizer.tokenize(asource, callback=echo)
'''
asource = ADSFactory.ads(sampling_rate=16000,
                         sample_width=2,
                         channels=1,
                         frames_per_buffer=128,
                         record=False,
                         block_dur=0.01)

validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                 energy_threshold=50)
tokenizer = StreamTokenizer(validator=validator,
                            min_length=100,
                            max_continuous_silence=500)

asource.open()

tokenizer.tokenize(asource, callback=echo)
示例#43
0

if __name__ == "__main__":

    # Delete all .wav files
    for file in glob.glob("*.wav"):
        remove(file)

    # Command utilities:
    handler = CommandHandler()

    # Auditok utilities:
    asource = ADSFactory.ads(sampling_rate=16000,
                             sample_width=2,
                             channels=1,
                             frames_per_buffer=512,
                             record=False,
                             block_dur=0.01)
    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                     energy_threshold=50)
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=100,
                                max_length=500,
                                max_continuous_silence=30)

    asource.open()

    # Main program loop
    log("Waiting for a command...")
    tokenizer.tokenize(asource, callback=listen_for_commands)