def transcribe_audio(self, stereo_path, channels_to_process):

        if not os.path.isfile(stereo_path):
            raise Exception("Audio file does not exist.")

        data = self.split_to_mono(stereo_path)

        a_leg = data['a_leg']
        b_leg = data['b_leg']

        data['a_leg'] = None
        data['b_leg'] = None

        validator = AudioEnergyValidator(sample_width=data['frame_width'], energy_threshold=45)
        trimmer = StreamTokenizer(validator,
                                  min_length=self.min_segment_length,
                                  max_length=self.max_segment_length,
                                  max_continuous_silence=self.max_continuous_silence,
                                  mode=StreamTokenizer.DROP_TAILING_SILENCE)

        segments = []
        if channels_to_process in ['A', 'AB']:
            a_source = ADSFactory.ads(audio_source=a_leg, record=True, block_size=data['frame_rate'] / self.divisor)
            a_source.open()
            trimmer.tokenize(a_source, callback=lambda data, start, end: segments.append(("A", data, start, end)))

        if channels_to_process in ['B', 'AB']:
            b_source = ADSFactory.ads(audio_source=b_leg, record=True, block_size=data['frame_rate'] / self.divisor)
            b_source.open()
            trimmer.tokenize(b_source, callback=lambda data, start, end: segments.append(("B", data, start, end)))

        segments = sorted(segments, key=lambda x: x[3])
        self.batch(segments, data['duration'], data['frame_rate'], data['frame_width'], data['nchannels'])
Exemplo n.º 2
0
    def transcribe_audio(self, stereo_path, channels_to_process):

        if not os.path.isfile(stereo_path):
            raise Exception("Audio file does not exist.")

        data = self.split_to_mono(stereo_path)

        a_leg = data['a_leg']
        b_leg = data['b_leg']

        data['a_leg'] = None
        data['b_leg'] = None

        validator = AudioEnergyValidator(sample_width=data['frame_width'], energy_threshold=45)
        trimmer = StreamTokenizer(validator,
                                  min_length=self.min_segment_length,
                                  max_length=self.max_segment_length,
                                  max_continuous_silence=self.max_continuous_silence,
                                  mode=StreamTokenizer.DROP_TAILING_SILENCE)

        segments = []
        if channels_to_process in ['A', 'AB']:
            a_source = ADSFactory.ads(audio_source=a_leg, record=True, block_size=data['frame_rate'] / self.divisor)
            a_source.open()
            trimmer.tokenize(a_source, callback=lambda data, start, end: segments.append(("A", data, start, end)))

        if channels_to_process in ['B', 'AB']:
            b_source = ADSFactory.ads(audio_source=b_leg, record=True, block_size=data['frame_rate'] / self.divisor)
            b_source.open()
            trimmer.tokenize(b_source, callback=lambda data, start, end: segments.append(("B", data, start, end)))

        segments = sorted(segments, key=lambda x: x[3])
        self.batch(segments, data['duration'], data['frame_rate'], data['frame_width'], data['nchannels'])
 def test_block_size(self):
     ads = ADSFactory.ads(audio_source=self.audio_source, block_size=512)
     size = ads.get_block_size()
     self.assertEqual(size, 512, "Wrong block_size, expected: 512, found: {0}".format(size))
     
     # with alias keyword
     ads = ADSFactory.ads(audio_source=self.audio_source, bs=160)
     size = ads.get_block_size()
     self.assertEqual(size, 160, "Wrong block_size, expected: 160, found: {0}".format(size))
 def test_block_duration(self):
     
     ads = ADSFactory.ads(audio_source=self.audio_source, block_dur=0.01) # 10 ms
     size = ads.get_block_size()
     self.assertEqual(size, 160, "Wrong block_size, expected: 160, found: {0}".format(size))
     
     # with alias keyword
     ads = ADSFactory.ads(audio_source=self.audio_source, bd=0.025) # 25 ms
     size = ads.get_block_size()
     self.assertEqual(size, 400, "Wrong block_size, expected: 400, found: {0}".format(size))
 def test_hop_duration(self):
     
     ads = ADSFactory.ads(audio_source=self.audio_source, block_dur=0.02, hop_dur=0.01) # 10 ms
     size = ads.hop_size
     self.assertEqual(size, 160, "Wrong hop_size, expected: 160, found: {0}".format(size))
     
     # with alias keyword
     ads = ADSFactory.ads(audio_source=self.audio_source, bd=0.025, hop_dur=0.015) # 15 ms
     size = ads.hop_size
     self.assertEqual(size, 240, "Wrong block_size, expected: 240, found: {0}".format(size))    
    def test_block_size(self):
        ads = ADSFactory.ads(audio_source=self.audio_source, block_size=512)
        size = ads.get_block_size()
        self.assertEqual(
            size, 512,
            "Wrong block_size, expected: 512, found: {0}".format(size))

        # with alias keyword
        ads = ADSFactory.ads(audio_source=self.audio_source, bs=160)
        size = ads.get_block_size()
        self.assertEqual(
            size, 160,
            "Wrong block_size, expected: 160, found: {0}".format(size))
    def test_block_duration(self):

        ads = ADSFactory.ads(audio_source=self.audio_source,
                             block_dur=0.01)  # 10 ms
        size = ads.get_block_size()
        self.assertEqual(
            size, 160,
            "Wrong block_size, expected: 160, found: {0}".format(size))

        # with alias keyword
        ads = ADSFactory.ads(audio_source=self.audio_source, bd=0.025)  # 25 ms
        size = ads.get_block_size()
        self.assertEqual(
            size, 400,
            "Wrong block_size, expected: 400, found: {0}".format(size))
def split(filename='g1238-20181214-081712-1544750232.37681.wav'):
    sr, samples = wavfile.read(filename=filename, mmap=True)
    #print(len(samples))
    plt.plot(samples)
    asource = ADSFactory.ads(filename=filename, record=False)
    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                     energy_threshold=50)
    # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
    # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
    # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
    # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 10 == 300 ms
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=100,
                                max_length=500,
                                max_continuous_silence=50)
    asource.open()
    tokens = tokenizer.tokenize(asource)
    stack = []
    sum = []
    for i, t in enumerate(tokens):
        #print("Token [{0}] starts at {1} and ends at {2}".format(i+1, t[1], t[2]))
        stack.append([t[1] * 80, t[2] * 80])
        sum.append((t[2] * 80 - t[1] * 80) / 8000)
        wavfile.write('token_' + str(i) + '.wav', sr,
                      samples[t[1] * 80:t[2] * 80])  #write to file
    asource.close()
    print(sum)
    return stack
 def test_data_buffer_alias(self):
     ads = ADSFactory.ads(db=self.signal,
                          sampling_rate=16,
                          sample_width=2,
                          channels=1)
     self.assertEqual(ads.get_audio_source().get_data_buffer(), self.signal,
                      "Wrong value for data buffer")
    def test_Recorder_Deco_rewind_and_read(self):
        ads = ADSFactory.ads(audio_source=self.audio_source,
                             record=True,
                             block_size=320)

        ads.open()
        for i in range(10):
            ads.read()

        ads.rewind()

        # read all available data after rewind
        ads_data = []
        while True:
            block = ads.read()
            if block is None:
                break
            ads_data.append(block)
        ads.close()
        ads_data = b''.join(ads_data)

        audio_source = WaveAudioSource(
            filename=dataset.one_to_six_arabic_16000_mono_bc_noise)
        audio_source.open()
        audio_source_data = audio_source.read(320 * 10)
        audio_source.close()

        self.assertEqual(ads_data, audio_source_data,
                         "Unexpected data read from RecorderADS")
 def test_Limiter_Overlap_Deco_read(self):    
     
     block_size = 256
     hop_size = 200
     
     ads = ADSFactory.ads(audio_source=self.audio_source, max_time=0.50, block_size=block_size, hop_size=hop_size)
     
     # Read all available data overlapping blocks
     ads.open()
     ads_data = []
     while True:
         block = ads.read()
         if block is None:
             break
         ads_data.append(block)
     ads.close()
     
     # Read all data from file and build a BufferAudioSource
     fp = wave.open(dataset.one_to_six_arabic_16000_mono_bc_noise, "r")
     wave_data = fp.readframes(fp.getnframes())
     fp.close()
     audio_source = BufferAudioSource(wave_data, ads.get_sampling_rate(),
                                      ads.get_sample_width(), ads.get_channels())
     audio_source.open()
     
     # Compare all blocks read from OverlapADS to those read
     # from an audio source with a manual set_position
     for i,block in enumerate(ads_data):            
         tmp = audio_source.read(block_size)
         
         self.assertEqual(block, tmp, "Unexpected block (N={0}) read from OverlapADS".format(i))
         
         audio_source.set_position((i+1) * hop_size)
     
     audio_source.close()
    def test_sample_width(self):
        ads = ADSFactory.ads(audio_source=self.audio_source)

        swidth = ads.get_sample_width()
        self.assertEqual(
            swidth, 2,
            "Wrong sample width, expected: 2, found: {0}".format(swidth))
Exemplo n.º 13
0
    def test_Limiter_Deco_read_limit(self):
        # read a maximum of 1.191 seconds from audio source
        ads = ADSFactory.ads(audio_source=self.audio_source, max_time=1.191)
        total_samples = round(ads.sampling_rate * 1.191)
        nb_full_blocks, last_block_size = divmod(total_samples, ads.block_size)
        total_samples_with_overlap = (nb_full_blocks * ads.block_size +
                                      last_block_size)
        expected_read_bytes = (total_samples_with_overlap * ads.sw *
                               ads.channels)

        total_read = 0
        ads.open()
        i = 0
        while True:
            block = ads.read()
            if block is None:
                break
            i += 1
            total_read += len(block)

        ads.close()
        err_msg = "Wrong data length read from LimiterADS, expected: {0}, "
        err_msg += "found: {1}"
        self.assertEqual(
            total_read,
            expected_read_bytes,
            err_msg.format(expected_read_bytes, total_read),
        )
 def test_Recorder_Deco_rewind_and_read(self):
     ads = ADSFactory.ads(audio_source=self.audio_source, record=True, block_size = 320)
     
     ads.open()
     for i in range(10):
         ads.read()
         
     ads.rewind()
     
     # read all available data after rewind
     ads_data = []
     while True:
         block = ads.read()
         if block is None:
             break
         ads_data.append(block)
     ads.close()
     ads_data = b''.join(ads_data)    
                 
     audio_source = WaveAudioSource(filename=dataset.one_to_six_arabic_16000_mono_bc_noise)
     audio_source.open()
     audio_source_data = audio_source.read(320 * 10)
     audio_source.close()
     
     self.assertEqual(ads_data, audio_source_data, "Unexpected data read from RecorderADS")
 def test_Recorder_Overlap_Deco_is_rewindable(self):
     ads = ADSFactory.ads(audio_source=self.audio_source,
                          block_size=320,
                          hop_size=160,
                          record=True)
     self.assertTrue(ads.is_rewindable(),
                     "RecorderADS.is_rewindable should return True")
 def test_hop_duration_alias(self):
     
     ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16,
                          sample_width=2, channels=1, bd=0.75, hd=0.5 )
     size = ads.hop_size
     self.assertEqual(size, 8, "Wrong block_size using bs alias, expected: 8, found: {0}".format(size))
     self.assertIsInstance(ads, ADSFactory.OverlapADS, "ads expected to an ADSFactory.OverlapADS object")
    def test_sampling_rate(self):
        ads = ADSFactory.ads(audio_source=self.audio_source)

        srate = ads.get_sampling_rate()
        self.assertEqual(
            srate, 16000,
            "Wrong sampling rate, expected: 16000, found: {0}".format(srate))
Exemplo n.º 18
0
 def test_default_block_size(self):
     ads = ADSFactory.ads(audio_source=self.audio_source)
     size = ads.block_size
     self.assertEqual(
         size,
         160,
         "Wrong default block_size, expected: 160, found: {0}".format(size),
     )
    def test_channels(self):
        ads = ADSFactory.ads(audio_source=self.audio_source)

        channels = ads.get_channels()
        self.assertEqual(
            channels, 1,
            "Wrong number of channels, expected: 1, found: {0}".format(
                channels))
 def test_sample_width_alias(self):
     ads = ADSFactory.ads(data_buffer=self.signal,
                          sampling_rate=16,
                          sw=2,
                          channels=1)
     swidth = ads.get_sample_width()
     self.assertEqual(
         swidth, 2,
         "Wrong sample width, expected: 2, found: {0}".format(swidth))
 def test_sampling_rate_alias(self):
     ads = ADSFactory.ads(data_buffer=self.signal,
                          sr=16,
                          sample_width=2,
                          channels=1)
     srate = ads.get_sampling_rate()
     self.assertEqual(
         srate, 16,
         "Wrong sampling rate, expected: 16000, found: {0}".format(srate))
Exemplo n.º 22
0
 def setUp(self):
     self.signal = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ012345"
     self.ads = ADSFactory.ads(
         data_buffer=self.signal,
         sampling_rate=16,
         sample_width=2,
         channels=1,
         block_size=4,
     )
 def test_max_time_alias(self):
     ads = ADSFactory.ads(data_buffer=self.signal,
                          sampling_rate=16,
                          sample_width=2,
                          channels=1,
                          mt=10)
     self.assertIsInstance(
         ads, ADSFactory.LimiterADS,
         "ads expected to an ADSFactory.LimiterADS object")
 def test_record_alias(self):
     ads = ADSFactory.ads(data_buffer=self.signal,
                          sampling_rate=16,
                          sample_width=2,
                          channels=1,
                          rec=True)
     self.assertIsInstance(
         ads, ADSFactory.RecorderADS,
         "ads expected to an ADSFactory.RecorderADS object")
    def test_hop_duration(self):

        ads = ADSFactory.ads(audio_source=self.audio_source,
                             block_dur=0.02,
                             hop_dur=0.01)  # 10 ms
        size = ads.hop_size
        self.assertEqual(
            size, 160,
            "Wrong hop_size, expected: 160, found: {0}".format(size))

        # with alias keyword
        ads = ADSFactory.ads(audio_source=self.audio_source,
                             bd=0.025,
                             hop_dur=0.015)  # 15 ms
        size = ads.hop_size
        self.assertEqual(
            size, 240,
            "Wrong block_size, expected: 240, found: {0}".format(size))
    def test_Limiter_Deco_type(self):
        ads = ADSFactory.ads(audio_source=self.audio_source, max_time=1)

        self.assertIsInstance(
            ads,
            ADSFactory.LimiterADS,
            msg=
            "wrong type for ads object, expected: 'ADSFactory.LimiterADS', found: {0}"
            .format(type(ads)))
    def test_Recorder_Deco_type(self):
        ads = ADSFactory.ads(audio_source=self.audio_source, record=True)

        self.assertIsInstance(
            ads,
            ADSFactory.RecorderADS,
            msg=
            "wrong type for ads object, expected: 'ADSFactory.RecorderADS', found: {0}"
            .format(type(ads)))
 def test_Recorder_Overlap_Deco_type(self):
     ads = ADSFactory.ads(audio_source=self.audio_source, block_size=256, hop_size=128, record=True)
     
     self.assertIsInstance(ads, ADSFactory.OverlapADS,
                         msg="wrong type for ads object, expected: 'ADSFactory.OverlapADS', found: {0}".format(type(ads)))
      
     
     self.assertIsInstance(ads.ads, ADSFactory.RecorderADS,
                           msg="wrong type for ads object, expected: 'ADSFactory.RecorderADS', found: {0}".format(type(ads)))
 def test_channels_alias(self):
     ads = ADSFactory.ads(data_buffer=self.signal,
                          sampling_rate=16,
                          sample_width=2,
                          ch=1)
     channels = ads.get_channels()
     self.assertEqual(
         channels, 1,
         "Wrong number of channels, expected: 1, found: {0}".format(
             channels))
    def test_ADS_type(self):

        ads = ADSFactory.ads(audio_source=self.audio_source)

        self.assertIsInstance(
            ads,
            ADSFactory.AudioDataSource,
            msg=
            "wrong type for ads object, expected: 'ADSFactory.AudioDataSource', found: {0}"
            .format(type(ads)))
Exemplo n.º 31
0
    def calibrate(self):
        '''
        This method calibrates the MinMaxScaler, self.scaler, by capturing 10 seconds
        of audio and applying MinMaxScaler fit method.
        See sklearn.preprocessing.MinMaxScaler for details.
        
        This is redundant, scaling is not necessary.
        
        '''
        a = raw_input(
            "Calibrate normalisation, press return then make noises from your mouth hole."
        )
        if self.audioPath == None:
            asource = ADSFactory.ads(sampling_rate=self.sr, max_time=10)
        else:
            asource = ADSFactory.ads(filename=self.audioPath,
                                     sampling_rate=self.sr,
                                     max_time=10)

        validator = AudioEnergyValidator(
            sample_width=asource.get_sample_width(),
            energy_threshold=self.energy)

        tokenizer = StreamTokenizer(validator=validator,
                                    min_length=self.min_len,
                                    max_length=self.max_len,
                                    max_continuous_silence=self.max_con_si)

        def calib_callback(data, start, end):
            audio = np.fromstring(data[0], dtype=np.int8)
            self.scaler.fit_transform(np.swapaxes(np.asarray([audio]), 0, 1))
            print "Audio sample found {0}--{1}".format(start, end)

        asource.open()

        tokenizer.tokenize(asource, callback=calib_callback)
        print "Scaler paramaters found: min: {0} max: {1}".format(
            self.scaler.data_min_, self.scaler.data_max_)

        print "calibration done"
        self.mini = self.scaler.data_min_
        self.maxi = self.scaler.data_max_
Exemplo n.º 32
0
def getSplitAudioDurationListBetweenSilence(fileName,eachAudioLen,silencePeriod,energyThreshold=55):
    try:
        # We set the `record` argument to True so that we can rewind the source
        asource = ADSFactory.ads(filename=fileName, record=False)

        validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=energyThreshold)

        # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
        # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
        # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
        # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms 
        tokenizer = StreamTokenizer(validator=validator, min_length=400, max_length=eachAudioLen*100, max_continuous_silence=silencePeriod*100)

        asource.open()
        tokens = tokenizer.tokenize(asource)

        # Play detected regions back
        #player = player_for(asource)

        # Rewind and read the whole signal
        #asource.rewind()
        #original_signal = []

        #while True:
        #    w = asource.read()
        #    if w is None:
        #        break
        #    original_signal.append(w)


        #original_signal = b''.join(original_signal)
        #player.play(original_signal)

        #print("\n ** playing detected regions...\n")
        #for i,t in enumerate(tokens):
        #    print("Token [{0}] starts at {1} and ends at {2}".format(i+1, t[1], t[2]))
            #data = b''.join(t[0])
            #player.play(data)

        #assert len(tokens) == 8

        asource.close()
        #player.stop()
    except KeyboardInterrupt:

        #player.stop()
        asource.close()
        #sys.exit(0)

    except Exception as e:

        sys.stderr.write(str(e) + "\n")
        #sys.exit(1)
    return tokens
Exemplo n.º 33
0
 def test_record_alias(self):
     ads = ADSFactory.ads(
         data_buffer=self.signal,
         sampling_rate=16,
         sample_width=2,
         channels=1,
         rec=True,
         block_dur=0.5,
     )
     self.assertTrue(ads.rewindable,
                     "AudioDataSource.rewindable expected to be True")
 def test_block_size_alias(self):
     ads = ADSFactory.ads(data_buffer=self.signal,
                          sampling_rate=16,
                          sample_width=2,
                          channels=1,
                          bs=8)
     size = ads.get_block_size()
     self.assertEqual(
         size, 8,
         "Wrong block_size using bs alias, expected: 8, found: {0}".format(
             size))
Exemplo n.º 35
0
    def test_ADS_type(self):

        ads = ADSFactory.ads(audio_source=self.audio_source)

        err_msg = "wrong type for ads object, expected: 'AudioDataSource', "
        err_msg += "found: {0}"
        self.assertIsInstance(
            ads,
            AudioDataSource,
            err_msg.format(type(ads)),
        )
 def test_Recorder_Deco_rewind(self):
     ads = ADSFactory.ads(audio_source=self.audio_source, record=True, block_size = 320)
     
     ads.open()
     ads.read()
     ads.rewind()
     
     
     self.assertIsInstance(ads.get_audio_source(), 
                           BufferAudioSource, "After rewind RecorderADS.get_audio_source should \
                           be an instance of BufferAudioSource")
     ads.close()
    def test_Overlap_Deco_type(self):
        # an OverlapADS is obtained if a valid hop_size is given
        ads = ADSFactory.ads(audio_source=self.audio_source,
                             block_size=256,
                             hop_size=128)

        self.assertIsInstance(
            ads,
            ADSFactory.OverlapADS,
            msg=
            "wrong type for ads object, expected: 'ADSFactory.OverlapADS', found: {0}"
            .format(type(ads)))
 def test_block_duration_alias(self):
     ads = ADSFactory.ads(data_buffer=self.signal,
                          sampling_rate=16,
                          sample_width=2,
                          channels=1,
                          bd=0.75)
     # 0.75 ms = 0.75 * 16 = 12
     size = ads.get_block_size()
     self.assertEqual(
         size, 12,
         "Wrong block_size set with a block_dur alias 'bd', expected: 8, found: {0}"
         .format(size))
    def test_Limiter_Recorder_Overlap_Deco_rewind_and_read_limit(self):

        # Use arbitrary valid block_size and hop_size
        block_size = 1000
        hop_size = 200

        ads = ADSFactory.ads(audio_source=self.audio_source,
                             max_time=1.317,
                             block_size=block_size,
                             hop_size=hop_size,
                             record=True)

        # Limiter + Overlap decos => read N block of actual data
        # one block of size block_size
        # N - 1 blocks of size hop_size
        # the total size of read data might be a slightly greater
        # than the required size calculated from max_time

        # theoretical size to reach
        expected_size = int(ads.get_sampling_rate() * 1.317) * \
                       ads.get_sample_width() * ads.get_channels()

        # minus block_size
        expected_size -= (block_size * ads.get_sample_width() *
                          ads.get_channels())

        # how much data are required to get N - 1 blocks of size hop_size
        hop_size_bytes = hop_size * ads.get_sample_width() * ads.get_channels()
        r = expected_size % hop_size_bytes
        if r > 0:
            expected_size += hop_size_bytes - r

        expected_size += block_size * ads.get_sample_width(
        ) * ads.get_channels()

        cache_size = (block_size -
                      hop_size) * ads.get_sample_width() * ads.get_channels()
        total_read = cache_size

        ads.open()
        i = 0
        while True:
            block = ads.read()
            if block is None:
                break
            i += 1
            total_read += len(block) - cache_size

        ads.close()
        self.assertEqual(
            total_read, expected_size,
            "Wrong data length read from LimiterADS, expected: {0}, found: {1}"
            .format(expected_size, total_read))
 def test_read(self):
     ads = ADSFactory.ads(audio_source=self.audio_source, block_size = 256)
     
     ads.open()
     ads_data = ads.read()
     ads.close()
     
     audio_source = WaveAudioSource(filename=dataset.one_to_six_arabic_16000_mono_bc_noise)
     audio_source.open()
     audio_source_data = audio_source.read(256)
     audio_source.close()
     
     self.assertEqual(ads_data, audio_source_data, "Unexpected data read from ads")
 def test_Limiter_Recorder_Overlap_Deco_rewind_and_read_limit(self):
     
     # Use arbitrary valid block_size and hop_size
     block_size = 1000
     hop_size = 200
     
     ads = ADSFactory.ads(audio_source=self.audio_source, max_time = 1.317, block_size=block_size, hop_size=hop_size, record=True)
     
     # Limiter + Overlap decos => read N block of actual data
     # one block of size block_size
     # N - 1 blocks of size hop_size
     # the total size of read data might be a slightly greater
     # than the required size calculated from max_time
     
     # theoretical size to reach          
     expected_size = int(ads.get_sampling_rate() * 1.317) * \
                    ads.get_sample_width() * ads.get_channels()
     
     # minus block_size
     expected_size -= (block_size * ads.get_sample_width() * ads.get_channels())
     
     # how much data are required to get N - 1 blocks of size hop_size
     hop_size_bytes = hop_size * ads.get_sample_width() * ads.get_channels()
     r = expected_size % hop_size_bytes
     if r > 0:
         expected_size += hop_size_bytes - r
     
     expected_size += block_size * ads.get_sample_width() * ads.get_channels()
     
     cache_size = (block_size - hop_size) * ads.get_sample_width() * ads.get_channels()
     total_read = cache_size
     
     ads.open()
     i = 0
     while True:
         block = ads.read()
         if block is None:
             break
         i += 1
         total_read += len(block) - cache_size
     
     ads.close()
     self.assertEqual(total_read, expected_size, "Wrong data length read from LimiterADS, expected: {0}, found: {1}".format(expected_size, total_read))
 def test_Limiter_Deco_read(self):
     # read a maximum of 0.75 seconds from audio source
     ads = ADSFactory.ads(audio_source=self.audio_source, max_time=0.75)
     
     ads_data = []
     ads.open()
     while True:
         block = ads.read()
         if block is None:
             break
         ads_data.append(block)
     ads.close()
     ads_data = b''.join(ads_data)    
                 
     audio_source = WaveAudioSource(filename=dataset.one_to_six_arabic_16000_mono_bc_noise)
     audio_source.open()
     audio_source_data = audio_source.read(int(16000 * 0.75))
     audio_source.close()
     
     self.assertEqual(ads_data, audio_source_data, "Unexpected data read from LimiterADS")
 def test_Limiter_Recorder_Overlap_Deco_rewind_and_read_alias(self):
     
     # Use arbitrary valid block_size and hop_size
     block_size = 5
     hop_size = 4
     
     ads = ADSFactory.ads(db=self.signal, sr=16,
                          sw=2, ch=1, mt = 0.80,
                          bs=block_size, hs=hop_size,
                          rec=True)
     
     # Read all available data overlapping blocks
     ads.open()
     i = 0
     while True:
         block = ads.read()
         if block is None:
             break
         i += 1
     
     ads.rewind()
     
     # Build a BufferAudioSource
     audio_source = BufferAudioSource(self.signal, ads.get_sampling_rate(),
                     ads.get_sample_width(), ads.get_channels())
     audio_source.open()
     
     # Compare all blocks read from OverlapADS to those read
     # from an audio source with a manual set_position
     for j in range(i):
         
         tmp = audio_source.read(block_size)
         
         block = ads.read()
         
         self.assertEqual(block, tmp, "Unexpected block (N={0}) read from OverlapADS".format(i))
         audio_source.set_position((j+1) * hop_size)
     
     ads.close()
     audio_source.close()
 def test_Limiter_Recorder_Overlap_Deco_rewind_and_read(self):
     
     # Use arbitrary valid block_size and hop_size
     block_size = 1600
     hop_size = 400
     
     ads = ADSFactory.ads(audio_source=self.audio_source, max_time = 1.50, block_size=block_size, hop_size=hop_size, record=True)
     
     # Read all available data overlapping blocks
     ads.open()
     i = 0
     while True:
         block = ads.read()
         if block is None:
             break
         i += 1
     
     ads.rewind()
     
     # Read all data from file and build a BufferAudioSource
     fp = wave.open(dataset.one_to_six_arabic_16000_mono_bc_noise, "r")
     wave_data = fp.readframes(fp.getnframes())
     fp.close()
     audio_source = BufferAudioSource(wave_data, ads.get_sampling_rate(),
                                      ads.get_sample_width(), ads.get_channels())
     audio_source.open()
     
     # Compare all blocks read from OverlapADS to those read
     # from an audio source with a manual set_position
     for j in range(i):
         
         tmp = audio_source.read(block_size)
         
         self.assertEqual(ads.read(), tmp, "Unexpected block (N={0}) read from OverlapADS".format(i))
         audio_source.set_position((j+1) * hop_size)
     
     ads.close()
     audio_source.close()
def _get_asr_result_whole(folder,prefix):
    asource = ADSFactory.ads(filename='./temp/{}.wav'.format(prefix), block_size=160)
    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=65)
    tokenizer = StreamTokenizer(validator=validator, min_length=300, max_length=1000, max_continuous_silence=50)
    asource.open()
    from pocketsphinx_decoder import decoder

    tokens = tokenizer.tokenize(asource)


    d = defaultdict(list)


    past = 0
    for content,start,end in tokens:
        save_audio_data(data=b''.join(content), filename='tmp.wav', filetype='wav', sr=asource.get_sampling_rate(),sw = asource.get_sample_width(),ch = asource.get_channels())
        decoder.start_utt()
        decoder.process_raw(open('tmp.wav','rb').read(),False,False)
        decoder.end_utt()
        seg = list(decoder.seg())
        print(' '.join([s.word for s in seg]))
        def add_feature(name,add=None):
            if add is None:
                d[name].extend(list(map(attrgetter(name),seg)))
            else:
                d[name].extend([attrgetter(name)(x)+add for x in seg])
        add_feature('start_frame',past)
        add_feature('end_frame',past)
        add_feature('word')
        add_feature('ascore')
        add_feature('lscore')
        add_feature('lback')
        add_feature('prob')
        past += len(content)
        df = pd.DataFrame(d)
        df = df[['start_frame','end_frame','ascore','lscore','lback','prob','word']]
        df.to_csv(path.join(folder ,'{}.csv'.format(prefix)), index=None)
 def test_Limiter_Deco_read_limit(self):
     # read a maximum of 1.25 seconds from audio source
     ads = ADSFactory.ads(audio_source=self.audio_source, max_time=1.191)
     
     # desired duration into bytes is obtained by:
     # max_time * sampling_rate * sample_width * nb_channels
     # Limiter deco tries to a total quantity of data as
     # possible to the desired duration in bytes.   
     # It reads N block of size block_size where:
     # (N - 1) * block_size < desired duration, AND
     # N * block_size >= desired duration
     
     # theoretical size to reach          
     expected_size = int(ads.get_sampling_rate() * 1.191) * \
                    ads.get_sample_width() * ads.get_channels()
     
     
     # how much data are required to get N blocks of size block_size
     block_size_bytes = ads.get_block_size() * ads.get_sample_width() * ads.get_channels()
     r = expected_size % block_size_bytes
     if r > 0:
         expected_size += block_size_bytes - r
     
     total_read = 0
     ads.open()
     i = 0
     while True:
         block = ads.read()
         if block is None:
             break
         i += 1
         total_read += len(block)
     
     ads.close()
         
     self.assertEqual(total_read, expected_size, "Wrong data length read from LimiterADS, expected: {0}, found: {1}".format(expected_size, total_read))
 def test_sample_width(self):
     ads = ADSFactory.ads(audio_source=self.audio_source)
     
     swidth = ads.get_sample_width()
     self.assertEqual(swidth, 2, "Wrong sample width, expected: 2, found: {0}".format(swidth))
 def test_sample_width_alias(self):
     ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16,
                          sw=2, channels=1)
     swidth = ads.get_sample_width()
     self.assertEqual(swidth, 2, "Wrong sample width, expected: 2, found: {0}".format(swidth))
 def test_sampling_rate_alias(self):
     ads = ADSFactory.ads(data_buffer=self.signal, sr=16,
                          sample_width=2, channels=1)
     srate = ads.get_sampling_rate()
     self.assertEqual(srate, 16, "Wrong sampling rate, expected: 16000, found: {0}".format(srate))
 def test_block_size_alias(self):
     ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16,
                          sample_width=2, channels=1, bs=8)
     size = ads.get_block_size()
     self.assertEqual(size, 8, "Wrong block_size using bs alias, expected: 8, found: {0}".format(size))
 def test_channels_alias(self):
     ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16,
                          sample_width=2, ch=1)
     channels = ads.get_channels()
     self.assertEqual(channels, 1, "Wrong number of channels, expected: 1, found: {0}".format(channels))
 def test_filename_alias(self):
     ads = ADSFactory.ads(fn=dataset.one_to_six_arabic_16000_mono_bc_noise)
 def test_block_duration_alias(self):
     ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16,
                          sample_width=2, channels=1, bd=0.75)
     # 0.75 ms = 0.75 * 16 = 12
     size = ads.get_block_size()
     self.assertEqual(size, 12, "Wrong block_size set with a block_dur alias 'bd', expected: 8, found: {0}".format(size))
 def test_max_time_alias(self):
     ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16,
                          sample_width=2, channels=1, mt=10)
     self.assertIsInstance(ads, ADSFactory.LimiterADS, "ads expected to an ADSFactory.LimiterADS object")
 def test_data_buffer_alias(self):
     ads = ADSFactory.ads(db=self.signal, sampling_rate=16,
                          sample_width=2, channels=1)
     self.assertEqual(ads.get_audio_source().get_data_buffer(), self.signal, "Wrong value for data buffer")
 def test_sampling_rate(self):
     ads = ADSFactory.ads(audio_source=self.audio_source)
     
     srate = ads.get_sampling_rate()
     self.assertEqual(srate, 16000, "Wrong sampling rate, expected: 16000, found: {0}".format(srate))
 def test_record_alias(self):
     ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16,
                          sample_width=2, channels=1, rec=True)
     self.assertIsInstance(ads, ADSFactory.RecorderADS, "ads expected to an ADSFactory.RecorderADS object")
 def test_channels(self):
     ads = ADSFactory.ads(audio_source=self.audio_source)
     
     channels = ads.get_channels()
     self.assertEqual(channels, 1, "Wrong number of channels, expected: 1, found: {0}".format(channels))
Exemplo n.º 59
0
import sys

try:

    energy_threshold = 45
    duration = 10  # seconds

    if len(sys.argv) > 1:
        energy_threshold = float(sys.argv[1])

    if len(sys.argv) > 2:
        duration = float(sys.argv[2])

    # record = True so that we'll be able to rewind the source.
    # max_time = 10: read 10 seconds from the microphone
    asource = ADSFactory.ads(record=True, max_time=duration)

    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=energy_threshold)
    tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=250, max_continuous_silence=30)

    player = player_for(asource)

    def echo(data, start, end):
        print("Acoustic activity at: {0}--{1}".format(start, end))
        player.play(b"".join(data))

    asource.open()

    print("\n  ** Make some noise (dur:{}, energy:{})...".format(duration, energy_threshold))

    tokenizer.tokenize(asource, callback=echo)
"""
@author: Amine SEHILI <*****@*****.**>
September, 2015
"""

from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for, dataset
import sys

try:

   # We set the `record` argument to True so that we can rewind the source
   asource = ADSFactory.ads(filename=dataset.one_to_six_arabic_16000_mono_bc_noise, record=True)

   validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=65)

   # Defalut analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
   # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
   # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
   # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms 
   tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=400, max_continuous_silence=30)

   asource.open()
   tokens = tokenizer.tokenize(asource)

   # Play detected regions back
   player = player_for(asource)

   # Rewind and read the whole signal
   asource.rewind()
   original_signal = []