示例#1
0
 def tts(self, text, session_begin_params):
     ret = c_int()
     sessionID = self.sdk.QTTSSessionBegin(session_begin_params, byref(ret))
     self._logger.debug('QTTSSessionBegin => sessionID: %s ret: %s'% (sessionID, ret.value))
     
     #input text
     ret = self.sdk.QTTSTextPut(sessionID, text, len(text), None)
     if const.MSP_SUCCESS != ret:
         self._logger.error("QTTSTextPut failed Error code %d.\n"%ret)
     else:
         self._logger.debug("QTTSTextPut SUCCESS=> %s"% ret)
     
     #systhesize audio
     audio_len = c_uint()
     synth_status = c_int()
     errorCode = c_int()
     lame = LameEncoder(const.RATE, const.CHANNEL, pyaudio.get_sample_size(pyaudio.paInt16))
     
     with tempfile.NamedTemporaryFile(suffix='.%s'%self.audioFormat,mode='w+b', delete=False) as f:
         audioFile = None
         if self.audioFormat == "mp3":
             audioFile = open(f.name, "wb+")
         else:
             audioFile = wave.open(f, "wb")
             # 配置声道数、量化位数、取样频率
             audioFile.setnchannels(const.CHANNEL)
             audioFile.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16))
             audioFile.setframerate(const.RATE)
     
         self._logger.debug('QTTSAudioGet => ')
         while True:
             self.sdk.QTTSAudioGet.restype = POINTER(c_ushort * (1024 * 1024))
             audio_data = self.sdk.QTTSAudioGet(sessionID, byref(audio_len), byref(synth_status), byref(errorCode))
             self._logger.debug('QTTSAudioGet => audio_len: %s synth_status: %s errorCode: %s'% (audio_len, synth_status,errorCode))
             if audio_data:
                 data = string_at(audio_data, audio_len.value)
                 if self.audioFormat == "mp3":
                     output = lame.encode(data)
                     audioFile.write(output)
                 else:
                     #将wav data 转换为二进制数据写入wav文件
                     audioFile.writeframes(data)
             if synth_status.value == const.MSP_TTS_FLAG_DATA_END or errorCode.value != const.MSP_SUCCESS:
                 break
             time.sleep(0.1)
             
         if self.audioFormat == "mp3":
             output = lame.flush()
             audioFile.write(output)
         audioFile.close()
         ret = self.sdk.QTTSSessionEnd(sessionID, "Normal")
         self._logger.debug('QTTSSessionEnd => ret: %s'% ret)
         f.seek(0)
         return f.name
示例#2
0
    def __init__(self, recognizer, agent):
        super(Listener, self).__init__()

        self.recognizer = recognizer
        self.agent = agent
        self.queue = Queue.Queue()
        self.running = True

        self.sample_rate = 16000
        self.sample_width = pyaudio.get_sample_size(pyaudio.paInt16) * 1
        self.channels = 1

        config = Decoder.default_config()
        config.set_string('-hmm', path.join(PS_MODEL_DIR, 'en-us/en-us'))
        config.set_string('-lm', path.join(PS_MODEL_DIR, 'en-us/en-us.lm.dmp'))
        config.set_string('-dict', path.join(DATA_DIR, 'pocketsphinx/model/en-us/victoria-en-us.dict'))
        config.set_string('-logfn', 'NUL')
        config.set_string('-keyphrase', KEY_PHRASE)
        config.set_float('-samprate', self.sample_rate)
        config.set_float('-kws_threshold', 1e-40)

        self.decoder = Decoder(config)
        self.decoder.start_utt()

        self.frames = collections.deque()
        self.listening = -1
        self.silence_start = -1
        self.last_logged_hyp = None
示例#3
0
    def __init__(self,
                 on_audio: FunctionType,
                 pa_instance: pyaudio.PyAudio = None,
                 **kwargs):
        """ Create a new Listener object. """

        # Set defaults for arguments
        kwargs.setdefault('format', pyaudio.paInt16)
        kwargs.setdefault('channels', 1)
        kwargs.setdefault('rate', 44100)
        self.chunk = kwargs.pop('chunk', 1024)
        self.threshold = kwargs.pop('threshold', 10.0)
        self.timeout = kwargs.pop('timeout', 1.0)
        self.sample_width = pyaudio.get_sample_size(kwargs['format'])

        # Register the function called when audio is captured
        self.on_audio = on_audio

        # Save the named arguments
        self.stream_args = kwargs

        # If we were constructed with a connection, use that,
        # Otherwise create one.
        if pa_instance is not None:
            self.connection = pa_instance
        else:
            self.connection = pyaudio.PyAudio()
        self.stream = self.connection.open(input=True, **kwargs)
    def __init__(self):
        self.format = pyaudio.paInt16  # 16-bit int sampling
        self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format)  # size of each sample
        self.SAMPLE_RATE = 16000  # sampling rate in Hertz
        self.CHUNK = 1024  # number of frames stored in each buffer

        self.stream = NaoMicStream()
示例#5
0
def sigToWav(data, stream, out):
    waveFile = wave.open(out, 'wb')
    waveFile.setnchannels(stream._channels)
    waveFile.setsampwidth(pyaudio.get_sample_size(stream._format))
    waveFile.setframerate(stream._rate)
    waveFile.writeframes(b''.join(data))
    waveFile.close()
示例#6
0
    def __init__(self, recognizer, agent):
        super(Listener, self).__init__()

        self.recognizer = recognizer
        self.agent = agent
        self.queue = Queue.Queue()
        self.running = True

        self.sample_rate = 16000
        self.sample_width = pyaudio.get_sample_size(pyaudio.paInt16) * 1
        self.channels = 1

        config = Decoder.default_config()
        config.set_string('-hmm', path.join(PS_MODEL_DIR, 'en-us/en-us'))
        config.set_string('-lm', path.join(PS_MODEL_DIR, 'en-us/en-us.lm.dmp'))
        config.set_string(
            '-dict',
            path.join(DATA_DIR,
                      'pocketsphinx/model/en-us/victoria-en-us.dict'))
        config.set_string('-logfn', 'NUL')
        config.set_string('-keyphrase', KEY_PHRASE)
        config.set_float('-samprate', self.sample_rate)
        config.set_float('-kws_threshold', 1e-40)

        self.decoder = Decoder(config)
        self.decoder.start_utt()

        self.frames = collections.deque()
        self.listening = -1
        self.silence_start = -1
        self.last_logged_hyp = None
示例#7
0
 def __init__(self, filename, channels, format, rate):
     super(WaveFile, self).__init__()
     self._frames = []
     self._wf = wave.open(filename, 'wb')
     self._wf.setnchannels(channels)
     self._wf.setsampwidth(pyaudio.get_sample_size(format))
     self._wf.setframerate(rate)
示例#8
0
    def on_command(self, data):
        """
        Perform actions after voice input was recorded.
        :param np.array data: The voice input data
        """
        self.recording_state = False

        # Simulate action
        time.sleep(5)

        if self.raspi_mode:
            self.light.processing()

        # TODO: For now, save the file
        filename = 'testapp' + datetime.now().strftime(
            '%Y-%m-%d_%H-%M-%S') + '.wav'
        wf = wave.open(filename, 'wb')
        wf.setnchannels(self.channels)
        wf.setsampwidth(
            pyaudio.get_sample_size(pyaudio.get_format_from_width(self.width)))
        wf.setframerate(self.sample_rate)
        wf.writeframes(b''.join(data))
        wf.close()

        self.recording = []
        self.data = np.zeros(self.feed_samples, dtype=self.format)
        self.queue.empty()
        self.recording_state = False

        if self.raspi_mode:
            self.light.off()
示例#9
0
    def __init__(self, wrapped_stream, format, muted=False):
        assert wrapped_stream is not None
        self.wrapped_stream = wrapped_stream
        self.muted = muted

        self.SAMPLE_WIDTH = pyaudio.get_sample_size(format)
        self.muted_buffer = b''.join([b'\x00' * self.SAMPLE_WIDTH])
示例#10
0
文件: Music.py 项目: Amarang/navsa
    def __init__(self):

        self.CHUNK = 1024
        self.RATE = 16000
        self.FORMAT = pyaudio.paInt16
        self.DTYPE='Int16'
        self.CHANNELS = 1

        self.RUN_SECONDS = 1000

        self.sampwidth = pyaudio.get_sample_size(self.FORMAT)

        self.mic = False
        self.wf = None

        self.deque_time = deque(maxlen=20)
        self.deque_mean = deque(maxlen=3)
        self.deque_freq = deque(maxlen=3)

        self.posneg_frequencies = np.fft.fftfreq(self.CHUNK, 1.0/self.RATE) 
        self.freqs_indices = np.where(self.posneg_frequencies >= 0)
        self.freqs = self.posneg_frequencies[np.where(self.posneg_frequencies >= 0)]

        self.t0 = time.time()
        self.ichunk = 0

        # plt.ion()
        # plt.axis([0,100,0,800])
        # fig=plt.figure()

        if RPI:
            self.led = Lights()
            self.led.start()
示例#11
0
 def record(self,
            participant='0',
            session='0',
            trial=0):  # TODO: eliminate this session requirement
     # record mark
     RECORDON = True
     self.RECORDON = RECORDON
     # open audio stream
     self.STREAM = self.pad.open(format=FORMAT,
                                 channels=CHANNELS,
                                 rate=RATE,
                                 input=True,
                                 frames_per_buffer=CHUNK)
     self.FRAMES = []
     while self.RECORDON:  # when being called, the 'finish' method will feed a False value to RECORDON to end loop
         self.SOUND = self.STREAM.read(CHUNK)
         self.FRAMES.append(self.SOUND)
     # close stream and terminate PyAudio object
     self.STREAM.stop_stream()
     self.STREAM.close()
     self.pad.terminate()
     # define file name
     WAVE_OUTPUT_FILENAME = str(participant) + '_' + str(
         session) + '_' + str(trial) + '.wav'
     # write sound to disk
     waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
     waveFile.setnchannels(CHANNELS)
     waveFile.setsampwidth(pyaudio.get_sample_size(FORMAT))
     waveFile.setframerate(RATE)
     waveFile.writeframes(b''.join(self.FRAMES))
     waveFile.close()
     return None
示例#12
0
    def __init__(self, byte_data, sample_rate, bit_width, channels, dtype = None):
        """
        byte_data: A byte string containing the raw data.
        BIT_WIDTH: bit width in bytes.
        """
        
        assert isinstance(bit_width, (int, long)) and bit_width > 0, \
                "`bit_width` must be positive integer."
        bit_width = pyaudio.get_sample_size(pyaudio.get_format_from_width(bit_width))
        
        assert isinstance(channels, int) and channels in [1, 2], \
                "`channels` can be either 1(mono) or 2(stereo)."
        assert channels in (1, 2), \
                "`channels` can be either 1(mono) or 2(stereo) only."
        
        assert sample_rate > 0, "`sample_rate` must be positive."

        self.__bit_width = bit_width
        self.__channels = channels
        self.__sample_rate = sample_rate
        self.__byte_data = byte_data # a byte string

        if dtype is None:
            dtype = self._get_dtype_by_bit_width()

        if not self._validate_dtype(dtype):
            raise ValueError("`dtype` is not compatible with the `bit_width`.")

        self.__dtype = dtype
        self.format = pyaudio.get_format_from_width(self.BIT_WIDTH)
示例#13
0
async def listen(websocket, _):
    print('Connected..')
    if settings.DEBUG:
        print('Debug activated')
    frame_data = []
    stt = speechtotext.SpeechToText()
    while True:
        chunk = await websocket.recv()
        if chunk == 'transcribe' and frame_data:
            audio_data = stt.process_audio(frame_data)

            if settings.AUDIO_STORAGE and frame_data:
                filename = f'{settings.AUDIO_FOLDER_PATH}/{uuid.uuid4()}.wav'
                with wave.open(filename, 'wb') as f:
                    f.setnchannels(settings.CHANNELS)
                    f.setsampwidth(pyaudio.get_sample_size(settings.FORMAT))
                    f.setframerate(settings.RATE)
                    f.writeframes(audio_data.frame_data)

            try:
                transcription = stt.recognize(audio_data)
                r = response(transcription)
            except speechtotext.RecognitionException as e:
                print('Error recognizing: {}'.format(str(e)))
                r = response(None, error=str(e))
            frame_data = []
            await websocket.send(r)
        else:
            frame_data.append(chunk)
示例#14
0
class Recorder:
    FORMAT = pyaudio.paInt16
    CHUNK = 1024
    SWIDTH = pyaudio.get_sample_size(pyaudio.paInt16)

    def __init__(self, input_name='Microphone', channel_id=1):
        self.__create_stream(input_name, channel_id)

    def __create_stream(self, input_name, channel_id):
        self.__stream = p.open(
            format=Recorder.FORMAT,
            channels=channel_id,
            rate=settings.sampling_rate,
            input=True,
            input_device_index=get_index_by_name(input_name),
            frames_per_buffer=Recorder.CHUNK)

    def record(self):
        nbits = self.__stream.get_read_available()
        try:
            raw_data = self.__stream.read(
                settings.recording_chunk_size,
                exception_on_overflow=False)  # TODO catch proper exception
            data = np.array(
                wave.struct.unpack("%dh" % (len(raw_data) / self.SWIDTH),
                                   raw_data))
        except OSError:
            print(('skipping audio', nbits))
            raw_data = self.__stream.read(settings.recording_chunk_size,
                                          exception_on_overflow=False)
            data = np.array(
                wave.struct.unpack("%dh" % (len(raw_data) / self.SWIDTH),
                                   raw_data))
        return data
示例#15
0
        def __init__(self,
                     device_index=None,
                     sample_rate=16000,
                     chunk_size=1024):
            assert device_index is None or isinstance(
                device_index, int), "Device index must be None or an integer"
            if device_index is not None:  # ensure device index is in range
                audio = pyaudio.PyAudio()
                count = audio.get_device_count()
                audio.terminate()  # obtain device count
                assert 0 <= device_index < count, "Device index out of range"
            assert isinstance(
                sample_rate, int
            ) and sample_rate > 0, "Sample rate must be a positive integer"
            assert isinstance(
                chunk_size, int
            ) and chunk_size > 0, "Chunk size must be a positive integer"
            self.device_index = device_index
            self.format = pyaudio.paInt16  # 16-bit int sampling
            self.SAMPLE_WIDTH = pyaudio.get_sample_size(
                self.format)  # size of each sample
            self.SAMPLE_RATE = sample_rate  # sampling rate in Hertz
            self.CHANNELS = 1  # mono audio
            self.CHUNK = chunk_size  # number of frames stored in each buffer

            self.audio = None
            self.stream = None
示例#16
0
文件: mic.py 项目: GDuncan1/SmartDoor
    def activeListenToAllOptions(self, THRESHOLD=None, LISTEN=True,
                                 MUSIC=False):
        """
            Records until a second of silence or times out after 12 seconds

            Returns a list of the matching options or None
        """

        RATE = 16000
        CHUNK = 1024
        LISTEN_TIME = 12

        # check if no threshold provided
        if THRESHOLD is None:
            THRESHOLD = self.fetchThreshold()

        self.speaker.play(jasperpath.data('audio', 'beep_hi.wav'))

        # prepare recording stream
        stream = self._audio.open(format=pyaudio.paInt16,
                                  channels=1,
                                  rate=RATE,
                                  input=True,
                                  frames_per_buffer=CHUNK)

        frames = []
        # increasing the range # results in longer pause after command
        # generation
        lastN = [THRESHOLD * 1.2 for i in range(30)]

        for i in range(0, int(RATE / CHUNK * LISTEN_TIME)):

            data = stream.read(CHUNK)
            frames.append(data)
            score = self.getScore(data)

            lastN.pop(0)
            lastN.append(score)

            average = sum(lastN) / float(len(lastN))

            # TODO: 0.8 should not be a MAGIC NUMBER!
            if average < THRESHOLD * 0.8:
                break

        self.speaker.play(jasperpath.data('audio', 'beep_lo.wav'))

        # save the audio data
        stream.stop_stream()
        stream.close()

        with tempfile.SpooledTemporaryFile(mode='w+b') as f:
            wav_fp = wave.open(f, 'wb')
            wav_fp.setnchannels(1)
            wav_fp.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16))
            wav_fp.setframerate(RATE)
            wav_fp.writeframes(b''.join(frames))
            wav_fp.close()
            f.seek(0)
            return self.active_stt_engine.transcribe(f)
示例#17
0
 def __init__(self, file_handler, to_text=True, stop_event=None):
     self.file_handler = file_handler
     self.sample_width = pyaudio.get_sample_size(
         settings.STREAM_AUDIO_FORMAT)
     self.to_text = to_text
     self.recognizer = sr.Recognizer()
     self.stop_event = stop_event
示例#18
0
    def __init__(self, wrapped_stream, format, muted=False):
        assert wrapped_stream is not None
        self.wrapped_stream = wrapped_stream
        self.muted = muted

        self.SAMPLE_WIDTH = pyaudio.get_sample_size(format)
        self.muted_buffer = b''.join([b'\x00' * self.SAMPLE_WIDTH])
示例#19
0
    def __init__(self, device_index, sample_rate, bit_width, chunk_size = 8092, channels = 1):

        audio = pyaudio.PyAudio()
        ## Checking the device_index is valid or not.
        assert isinstance(device_index, (int, long)), "Device index must be an integer."
        device_count = audio.get_device_count()
        assert 0 <= device_index < device_count, "`device_index` out of range: {} out of {}".format(device_index, count)
        audio.terminate()
        self.__device_index = device_index

        if not self.device_info["maxInputChannels"] > 0:
            raise DeviceTypeError("Can not source from a non-input device.")

        self.__format = pyaudio.get_format_from_width(bit_width)
        self.__bit_width = pyaudio.get_sample_size(self.FORMAT)

        assert isinstance(sample_rate, (int, long)), "`sample_rate` must be integer."
        
        max_sample_rate = self.device_info["defaultSampleRate"]
        assert 0 < sample_rate <= max_sample_rate, "`sample_rate` out of range: {} out of {}".format(sample_rate, max_sample_rate)
        self.__sample_rate = sample_rate

        assert isinstance(chunk_size, (int, long)), "`chunk_size` must be integer."
        self.__chunk_size = chunk_size

        assert channels in [1, 2], '`channels` can be either 1 or 2. 1 for mono audio, 2 for stereo.' 
        self.__channels = channels

        # audio resource and streams.
        self.__audio = None
        self.__input_stream = None
    def __init__(self, path_prefix):
        assert len(self.COLOR_GRADIENT_WHEEL) == self.FRAME_HEIGHT, \
            "Need exactly {} colors in 'COLOR_GRADIENT_WHEEL'".format(self.FRAME_HEIGHT)
        # Convert hex string (for easy programmer modification) to bytearrays in 'COLOR_GRADIENT_WHEEL'
        for i, color_str in enumerate(self.COLOR_GRADIENT_WHEEL):
            self.COLOR_GRADIENT_WHEEL[i] = np.frombuffer(
                bytes.fromhex(color_str), dtype=np.uint8)

        self.template = np.zeros(
            (self.FRAME_HEIGHT, self.FRAME_WIDTH, self.NUM_COLOR_CHANNELS),
            dtype=np.uint8)
        self.pyaudio = PyAudio()
        audio_device_index = settings(
            path_prefix).get_selected_audio_device_index()
        self.audio_device_info = self.pyaudio.get_device_info_by_index(
            audio_device_index)
        if self.audio_device_info[
                'maxOutputChannels'] < self.NUM_AUDIO_CHANNELS:
            raise Exception("Audio output device should be at least stereo.")

        self.format = pyaudio.paInt16
        self.sample_size = pyaudio.get_sample_size(self.format)
        self.stream = None
        self.raw_audio_frames = b'\x00' * (self.NUM_AUDIO_CHANNELS *
                                           self.NUM_AUDIO_FRAMES_PER_BUFFER *
                                           self.sample_size)
示例#21
0
    def activeListenToAllOptions(self, THRESHOLD=None, LISTEN=True, MUSIC=False):
        """
            Records until a second of silence or times out after 12 seconds

            Returns a list of the matching options or None
        """

        RATE = 16000
        CHUNK = 1024
        LISTEN_TIME = 12

        # check if no threshold provided
        if THRESHOLD == None:
            THRESHOLD = self.fetchThreshold()

        self.speaker.play(jasperpath.data('audio', 'beep_hi.wav'))

        # prepare recording stream
        stream = self._audio.open(format=pyaudio.paInt16,
                            channels=1,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK)

        frames = []
        # increasing the range # results in longer pause after command generation
        lastN = [THRESHOLD * 1.2 for i in range(30)]

        for i in range(0, RATE / CHUNK * LISTEN_TIME):

            data = stream.read(CHUNK)
            frames.append(data)
            score = self.getScore(data)

            lastN.pop(0)
            lastN.append(score)

            average = sum(lastN) / float(len(lastN))

            # TODO: 0.8 should not be a MAGIC NUMBER!
            if average < THRESHOLD * 0.8:
                break

        self.speaker.play(jasperpath.data('audio', 'beep_lo.wav'))

        # save the audio data
        stream.stop_stream()
        stream.close()

        with tempfile.SpooledTemporaryFile(mode='w+b') as f:
            wav_fp = wave.open(f, 'wb')
            wav_fp.setnchannels(1)
            wav_fp.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16))
            wav_fp.setframerate(RATE)
            wav_fp.writeframes(''.join(frames))
            wav_fp.close()
            f.seek(0)
            mode = TranscriptionMode.MUSIC if MUSIC else TranscriptionMode.NORMAL
            transcribed = self.active_stt_engine.transcribe(f, mode=mode)
        return transcribed
    def __init__(self, rt):
        super().__init__(rt)
        self.chunk_size = self.config['chunk_size']
        self.format = pyaudio.paInt16
        self.sample_width = pyaudio.get_sample_size(self.format)
        self.sample_rate = self.config['sample_rate']
        self.channels = self.config['channels']

        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(format=self.format,
                                  channels=self.channels,
                                  rate=self.sample_rate,
                                  input=True,
                                  frames_per_buffer=self.chunk_size)

        self.talking_volume_ratio = self.config['talking_volume_ratio']
        self.required_integral = self.config['required_noise_integral']
        self.max_di_dt = self.config['max_di_dt']
        self.noise_max_out_sec = self.config['noise_max_out_sec']
        self.recording_timeout = self.config['recording_timeout']
        self.energy_weight = 1.0 - pow(
            1.0 - self.config['ambient_adjust_speed'],
            self.chunk_size / self.sample_rate)

        # For convenience
        self.chunk_sec = self.chunk_size / self.sample_rate

        self.av_energy = None
        self.integral = 0
        self.noise_level = 0
        self._intercept = None
        self._has_activated = False
        self.engine = WakeWordService(
            rt, self.on_activation)  # type: WakeWordEnginePlugin
        self.engine.startup()
示例#23
0
def predict_file(dec,
                 pyaudio,
                 path,
                 frames,
                 args,
                 rate=16000,
                 format=pyaudio.paInt16,
                 save=False):
    wf = wave.open(path, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(pyaudio.get_sample_size(format))
    wf.setframerate(rate)
    #this code works for only for pulseaudio
    #wf.writeframes(b''.join(frames))
    wf.writeframes(frames)
    wf.close()

    results = dec.predict_file(path,
                               feat_mode=args.feat_mode,
                               feat_dim=args.feat_dim,
                               three_d=args.three_d)

    if save == False:
        os.remove(path)
    if args.predict_mode == 0:
        task_outputs = dec.returnDiff(results)
    elif args.predict_mode == 1:
        task_outputs = dec.returnLabel(results)
    else:
        task_outputs = dec.returnClassDist(results)
    return task_outputs
示例#24
0
 def save(self):
     wf = wave.open(self._name, 'wb')
     wf.setnchannels(self._channels)
     wf.setsampwidth(pyaudio.get_sample_size(self._format))
     wf.setframerate(self._rate)
     wf.writeframes(b''.join(self._frames))
     wf.close()
示例#25
0
文件: mic.py 项目: paskausks/tewn
    def sample_size(self) -> int:
        """
        Get size of a single audio sample.

        :return: Size in bytes
        """
        return pyaudio.get_sample_size(self._sample_fmt)
示例#26
0
	def __init__(self, device_index = None):
		self.format = pyaudio.paInt16
		self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format)
		self.CHUNK = 1024

		self.audio = None
		self.stream = None
示例#27
0
	def __init__(self, args, CHUNK = 1024, FORMAT = pyaudio.paInt16, CHANNELS = 1, RATE = 16000, THRESHOLD = 2500, SILENCE_LIMIT = 2, PREV_AUDIO = 1):
		"""
		Initialization method for class AudioUtils.
		
		Defines the constants needed throughout the program.
		
		Keyword Arguments:
			CHUNK {number} -- CHUNKS of bytes to read each time from mic (default: {1024})
			FORMAT {[type]} -- [description] (default: {pyaudio.paInt16})
			CHANNELS {number} -- [description] (default: {1})
			RATE {number} -- [description] (default: {16000})
			THRESHOLD {number} -- The threshold intensity that defines silence and noise signal (an int. lower than THRESHOLD is silence) (default: {2500})
			SILENCE_LIMIT {number} -- Silence limit in seconds. The max ammount of seconds where
				   only silence is recorded. When this time passes the
				   recording finishes and the file is delivered. (default: {2})
			PREV_AUDIO {number} -- Previous audio (in seconds) to prepend. When noise
				  is detected, how much of previously recorded audio is
				  prepended. This helps to prevent chopping the beggining
				  of the phrase. (default: {1})
		"""

		vc_logging.init_logger(level = args.log_level, verbose = args.verbose)
		self.log = logging.getLogger("vc_logger")

		self.WIDTH = pyaudio.get_sample_size(FORMAT)

		self.CHUNK = CHUNK
		self.FORMAT = FORMAT
		self.CHANNELS = CHANNELS
		self.RATE = RATE
		self.THRESHOLD = THRESHOLD
		self.SILENCE_LIMIT = SILENCE_LIMIT
		self.PREV_AUDIO = PREV_AUDIO
		self.audioQueue = Queue()
示例#28
0
def readMic(utteranceToneQ, utteranceSpeechQ, audioInputDevice):

    # setup
    DEVICE_IP_HW = audioInputDevice  # this usually is hw:2,0
    # DEVICE_IP_HW = audioInput
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    CHUNK = 4096
    BASELINE_SECONDS = 3
    CHECK_SILENCE_SECONDS = 1
    UTTERANCE_SECONDS = 5
    OUTPUT_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                              "test")

    try:
        with noalsaerr():
            p = pyaudio.PyAudio()  # start the PyAudio class

        # open stream with this device
        stream = p.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input_device_index=get_ip_device_index(
                            p, DEVICE_IP_HW),
                        input=True,
                        frames_per_buffer=CHUNK)

        # THRESHOLD = getThreshold(stream, RATE, CHUNK, BASELINE_SECONDS) +3000 # just to be safe
        THRESHOLD = 20000  # set for testing
        print("________________________________________")
        print("RECORDER -> Threshold : " + str(THRESHOLD))
        print("________________________________________")

        utteranceCount = 0
        while (True):
            utteranceData = getUtterance(stream, RATE, CHUNK, THRESHOLD,
                                         CHECK_SILENCE_SECONDS,
                                         UTTERANCE_SECONDS)
            # print("-----------------------------------------------")
            # set up the wav container to store the recorded 5 second utterances
            wavFile = wave.open(
                os.path.join(OUTPUT_DIR,
                             "mic_" + str(utteranceCount) + ".wav"), "w")
            wavFile.setnchannels(CHANNELS)
            wavFile.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16))
            wavFile.setframerate(RATE)
            wavFile.writeframes(utteranceData)
            wavFile.close()
            # print("saved " + os.path.join(OUTPUT_DIR, "mic_" + str(utteranceCOunt) + ".wav"))

            utteranceToneQ.put(utteranceData)
            utteranceSpeechQ.put(utteranceData)
            # print("AUDIO RECORDER -> Utterance " + str(utteranceCount) + " recorded")
            # print("-----------------------------------------------")

            utteranceCount += 1

    except:
        pass
示例#29
0
 def pyaudio_scb(self, rate, fmt=pyaudio.paInt16):
     samp_size = pyaudio.get_sample_size(fmt)
     maxint = (1 << (8*samp_size)) - 1
     dtype = ['!', 'h', 'i', '!', 'l', '!', '!', '!', 'q'][samp_size]
     def __callback(data, frames, time, status, self=self, rate=rate, maxint=maxint, dtype=dtype):
         return struct.pack(dtype*frames, *[maxint*int(i) for i in self.data(frames, self.freq, rate)])
     return __callback
示例#30
0
def getAudio():
    #set properties of audio
    CHUNK = 2**11
    RATE = 44100
    CHANNELS = 2

    #open stream
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    ##Listen to Microphone
    listening = False
    while (True):
        frames = []
        i = 0
        peak = 0
        while (True):
            data = np.fromstring(stream.read(CHUNK), dtype=np.int16)
            peak = np.average(np.abs(data)) * 2

            #check if silent
            if (peak < 600):  ##do some tuning here boi
                ##Cut words and save words from microphone
                if (listening):

                    ##Save the audio to disk
                    filename = 'downloads/test' + str(random.randint(
                        0, 10000)) + '.wav'
                    wavefile = wave.open(filename, 'wb')
                    wavefile.setnchannels(1)
                    wavefile.setsampwidth(
                        pyaudio.get_sample_size(pyaudio.paInt16))
                    wavefile.setframerate(RATE)
                    wavefile.writeframes(b''.join(frames))
                    wavefile.close

                    frames = []
                    listening = False
                    #print("SAVED")
                    ##Start a new thread
                    t = Thread(target=recognizeAudio, args=(filename, ))
                    t.start()

                #print("Not listening")
            else:
                listening = True
                #print("Listening")
                #data_audio=stream.read(CHUNK)
                frames.append(data)

    ##display words

    stream.stop_stream()
    stream.close()
    p.terminate()
示例#31
0
 def save_audio(self, fp, frames):
     sample_width = pyaudio.get_sample_size(self.format)
     f = open_audio(fp, 'wb')
     f.setsampwidth(sample_width)
     f.setframerate(self.rate)
     f.setnchannels(1)
     f.writeframes(''.join(frames))
     f.close()
示例#32
0
def write_wav(frames, file_name):

    waveFile1 = wave.open(file_name, 'wb')
    waveFile1.setnchannels(1)
    waveFile1.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16))
    waveFile1.setframerate(44100)
    waveFile1.writeframes(b''.join(frames))
    waveFile1.close()
示例#33
0
 def save_file(self):
     c=Configure()
     wf = wave.open(self.output_file_name, 'wb')
     wf.setnchannels(c.CHANNELS)
     wf.setsampwidth(pyaudio.get_sample_size(c.FORMAT))
     wf.setframerate(c.RATE)
     wf.writeframes(b''.join(self.wave_data_after))
     wf.close()
示例#34
0
 def save_audio(self, fp, frames):
     sample_width = pyaudio.get_sample_size(self.format)
     f = open_audio(fp, 'wb')
     f.setsampwidth(sample_width)
     f.setframerate(self.rate)
     f.setnchannels(1)
     f.writeframes(''.join(frames))
     f.close()
示例#35
0
    def __callback(self, in_data,  # recorded data if input=True; else None
                   frame_count,  # number of frames
                   time_info,  # dictionary
                   status_flags):  # PaCallbackFlags

        n = self.buffer.write_chunk(in_data, len(in_data))
        self.dropped_samples += frame_count - (n / (self.channels * pyaudio.get_sample_size(self.format)))
        self.rec_bytes += n
        return None, pyaudio.paContinue
示例#36
0
 def __init__(self):
     self.pa = pa = pyaudio.PyAudio()
     self.stream = pa.open(format=self.FORMAT,
                           channels=self.CHANNELS,
                           rate=self.RATE,
                           input=True,
                           frames_per_buffer=self.CHUNK)
     self.packer = Pcm2Wave(self.RATE, pyaudio.get_sample_size(self.FORMAT),
                            self.CHANNELS)
示例#37
0
 def __init__(self):
     self._sound = pyaudio.PyAudio()
     self.format = pyaudio.paInt16
     self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format)
     self.stream = self._sound.open(rate=self.SAMPLE_RATE,
                                    channels=1,
                                    format=self.format,
                                    input=True,
                                    frames_per_buffer=self.CHUNK)
示例#38
0
def write_chunks(path, frames, rate=44100, channels=2, format=pyaudio.paInt16):
    """ write audio to hard disk """
    
    wf = wave.open(path, "wb")
    wf.setnchannels(channels)
    wf.setsampwidth(pyaudio.get_sample_size(format))
    wf.setframerate(rate)
    wf.writeframes(b"".join(frames))
    wf.close()
示例#39
0
def ActiveListening(this):
    threshold = None
    textout.SystemPrint("Started to listen actively")
    #RECORD A WAV FILE, CUTOFF AT 12s OR FALLS BELOW THRESHOLD
    #SEND TO WITAI
    #RECIEVE INPUT
    #SEND TO PROCESSOR AND RETURN INTENT
    #FIND ACTION.

    RATE = 16000
    CHUNK = 1024

    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    frames = []

    lastN = [
        144 * 1.2 for i in range(15)
    ]  #changing array length will determine if average will change faster or not

    for i in range(0, int(RATE / CHUNK *
                          5)):  #RATE(16000) / CHUNK(1024) * TIME (12s)

        data = stream.read(CHUNK)
        frames.append(data)
        score = getScore(data)

        lastN.pop(0)
        lastN.append(score)
        average = sum(lastN) / float(len(lastN))
        print(str(average))

        if average < 144 - 20:
            textout.SystemPrint("Listening stopped, below threshold.")
            break

    textout.SystemPrint("Listening Timeout!")
    stream.stop_stream()
    stream.close()
    p.terminate()

    with tempfile.NamedTemporaryFile(mode='w+b') as f:
        wav_fp = wave.open(f, 'wb')
        wav_fp.setnchannels(1)
        wav_fp.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16))
        wav_fp.setframerate(16000)
        wav_fp.writeframes(b''.join(frames))
        wav_fp.close()
        f.seek(0)
        transcriber.TranscribeAudiofile(this, f)

    textout.SystemPrint("Stopped listening actively")
示例#40
0
        def __init__(self, device_index = None):
            self.device_index = device_index
            self.format = pyaudio.paInt16 # 16-bit int sampling
            self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format)
            self.RATE = 16000 # sampling rate in Hertz
            self.CHANNELS = 1 # mono audio
            self.CHUNK = 1024 # number of frames stored in each buffer

            self.audio = None
            self.stream = None
示例#41
0
文件: audio_fun.py 项目: adgaudio/Bin
def write_wave(arrs, filepath,
               sample_width=pyaudio.get_sample_size(PYAUDIO_FORMAT),
               channels=CHANNELS, sample_rate=SAMPLE_RATE):
    f = wave.open(filepath, 'w')
    f.setnchannels(channels)
    f.setsampwidth(sample_width)
    f.setframerate(sample_rate)
    for arr in _structpack(arrs):
        f.writeframes(arr)
    f.close()
示例#42
0
        def __init__(self, device_index=None):
            self.device_index = device_index
            self.format = pyaudio.paInt16  # 16-bit int sampling
            self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format)
            self.RATE = 16000  # sampling rate in Hertz
            self.CHANNELS = 1  # mono audio
            self.CHUNK = 1024  # number of frames stored in each buffer

            self.audio = None
            self.stream = None
示例#43
0
        def __init__(self, device_index = None):
            self.device_index = device_index
            self.format = pyaudio.paInt16
            self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format)
            self.RATE = 44100
            self.CHANNELS = 1
            self.CHUNK = 2205

            self.audio = None
            self.stream = None
示例#44
0
    def passiveListen(self, persona):
        rate = self.PASSIVE_RATE
        chunk = self.PASSIVE_CHUNK
        LISTEN_TIME = 10

        THRESHOLD = self.THRESHOLD_MULTIPLIER * self.fetchThreshold()
        stream = self._audio.open(format=pyaudio.paInt16,
                                  channels=1,
                                  rate=rate,
                                  input=True,
                                  frames_per_buffer=chunk)
        frames = []
        didDetect = False

        for i in range(0, rate / chunk * LISTEN_TIME):

            data = stream.read(chunk)
            frames.append(data)
            score = self.getScore(data)

            if score > THRESHOLD:
                didDetect = True
                break

        # no use continuing if no flag raised
        if not didDetect:
            print "No disturbance detected"
            stream.stop_stream()
            stream.close()
            return (None, None)

        # cutoff any recording before this disturbance was detected
        frames = frames[-20:]

        # otherwise, let's keep recording for few seconds and save the file
        DELAY_MULTIPLIER = 1.5
        for i in range(0, rate / chunk * DELAY_MULTIPLIER):
            data = stream.read(chunk)
            frames.append(data)

        with tempfile.NamedTemporaryFile(mode='w+b') as f:
            wav_fp = wave.open(f, 'wb')
            wav_fp.setnchannels(1)
            wav_fp.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16))
            wav_fp.setframerate(rate)
            wav_fp.writeframes(''.join(frames))
            wav_fp.close()
            f.seek(0)
            # check if PERSONA was said
            transcribed = self.passive_transcribe(f)

        if any(persona in phrase for phrase in transcribed):
            return (THRESHOLD, persona)

        return (False, transcribed)
示例#45
0
  def activeListenToAllOptions(self, THRESHOLD=None, LISTEN=True,
                               MUSIC=False):
    """
        Records until a second of silence or times out after 12 seconds
        Returns a list of the matching options or None
    """

    # check if no threshold provided
    if THRESHOLD is None:
      THRESHOLD = self.fetchThreshold()

    playing = 'playing' in check_output(['mpc', '-h', '[email protected]']).split('\n')[1]
    if playing:
      call(['mpc', '-h', '[email protected]', 'pause'])

    # self.speaker.play(jasperpath.data('audio', 'beep_hi.wav'))
    self.say(random.choice(BEFORE))

    frames = []
    # increasing the range # results in longer pause after command
    # generation
    lastN = [THRESHOLD * 1.2 for i in range(30)]

    for i in range(0, RATE / CHUNK * LISTEN_TIME):

      data = self.queue.get()
      frames.append(data)
      score = self.getScore(data)

      lastN.pop(0)
      lastN.append(score)

      average = sum(lastN) / float(len(lastN))

      # TODO: 0.8 should not be a MAGIC NUMBER!
      if average < THRESHOLD * 0.8:
        break

    # self.speaker.play(jasperpath.data('audio', 'beep_lo.wav'))
    self.say(random.choice(AFTER))

    if playing:
      call(['mpc', '-h', '[email protected]', 'play'])

    # save the audio data
    with tempfile.SpooledTemporaryFile(mode='w+b') as f:
      wav_fp = wave.open(f, 'wb')
      wav_fp.setnchannels(1)
      wav_fp.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16))
      wav_fp.setframerate(RATE)
      wav_fp.writeframes(''.join(frames))
      wav_fp.close()
      f.seek(0)
      return self.active_stt_engine.transcribe(f)
示例#46
0
        def __init__(self, device_index = None):
            assert device_index is None or isinstance(device_index, int), "Device index must be None or an integer"
            self.device_index = device_index
            self.format = pyaudio.paInt16 # 16-bit int sampling
            self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format)
            self.RATE = 16000 # sampling rate in Hertz
            self.CHANNELS = 1 # mono audio
            self.CHUNK = 1024 # number of frames stored in each buffer

            self.audio = None
            self.stream = None
示例#47
0
    def __init__(self, hmm_type=1, vad_threshold=3.5, pl_window=10, wip=1e-4, 
                  silprob=0.3, bestpath=True, remove_dc=True, do_keyphrase=False, keyphrase="NAVSA", kws_threshold=1e-4):

        self.CHUNK = 1024
        self.RATE = 16000
        self.FORMAT = pyaudio.paInt16
        self.CHANNELS = 1

        self.RUN_SECONDS = 1000

        self.sampwidth = pyaudio.get_sample_size(self.FORMAT)

        self.do_trigger = False

        # Create a decoder with certain model
        self.config = Decoder.default_config()

        if hmm_type == 0:
            self.config.set_string('-hmm', '/usr/local/share/pocketsphinx/model/en-us/en-us')
        elif hmm_type == 1:
            self.config.set_string('-hmm', 'model/cmusphinx-en-us-5.2')

        self.config.set_string('-dict', 'model/7705.dic')

        if do_keyphrase:
            self.config.set_string('-keyphrase', keyphrase)
            self.config.set_float('-kws_threshold', kws_threshold)
        else:
            self.config.set_string('-lm', 'model/7705.lm')


        self.config.set_string('-logfn', '/dev/null')
        self.config.set_string('-debug', '1')

        # http://cmusphinx.sourceforge.net/wiki/pocketsphinxhandhelds
        self.config.set_boolean('-bestpath', bestpath) # default is true
        self.config.set_float('-vad_threshold', vad_threshold) # default is 2
        self.config.set_float("-pl_window", pl_window) # default is 5, range is 0 to 10
        self.config.set_float('-wip', wip) #  0.005           Silence word transition probability
        self.config.set_float('-silprob', silprob) # 0.65            Word insertion penalty
        self.config.set_string('-remove_dc', 'yes' if remove_dc else 'no')

        self.decoder = Decoder(self.config)

        self.deque_time = deque(maxlen=20)
        self.deque_mean = deque(maxlen=50)

        self.mic = False
        self.wf = None
        self.vad = False
        self.rec_trigger = False
        self.rec_frames = []
        self.sec_since_kw = 999.9
        self.sec_since_vad = 999.9
示例#48
0
 def read(self, buf, source_channels):
     source_sample_width = pyaudio.get_sample_size(pyaudio.paInt16) * source_channels
     audio = buf[3:]
     try:
         # sometimes the data received is incomplete so reusing state
         # data from ratecv() sometimes results in errors
         (audio, _) = audioop.ratecv(audio, source_sample_width, source_channels, 48000, self.listener.sample_rate, None)
         audio = audioop.tomono(audio, self.listener.sample_width, 0.5, 0.5)
         self.listener.read(audio)
     except audioop.error, e:
         logger.warn("Error preparing sample", exc_info=True)
示例#49
0
文件: demo.py 项目: ananko/speech_box
  def __init__(self):
    self.CHUNK = 1024
    self.FORMAT = pyaudio.paInt16
    self.CHANNELS = 2
    self.RATE = 44100
    self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.FORMAT)

    self.energy_threshold = 1500
    self.pause_threshold = 0.8
    self.quiet_duration = 0.5

    self.seconds_per_buffer = self.CHUNK / self.RATE
    self.pause_buffer_count = math.ceil(self.pause_threshold / self.seconds_per_buffer)
    self.quiet_buffer_count = math.ceil(self.quiet_duration / self.seconds_per_buffer)
示例#50
0
        def __init__(self, device_index = None):
            assert device_index is None or isinstance(device_index, int), "Device index must be None or an integer"
            if device_index is not None: # ensure device index is in range
                audio = pyaudio.PyAudio(); count = audio.get_device_count(); audio.terminate() # obtain device count
                assert 0 <= device_index < count, "Device index out of range"
            self.device_index = device_index
            self.format = pyaudio.paInt16 # 16-bit int sampling
            self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format)
            self.RATE = 16000 # sampling rate in Hertz
            self.CHANNELS = 1 # mono audio
            self.CHUNK = 1024 # number of frames stored in each buffer

            self.audio = None
            self.stream = None
示例#51
0
        def __init__(self, device_index = None, sample_rate = 16000, chunk_size = 1024):
            assert device_index is None or isinstance(device_index, int), "Device index must be None or an integer"
            if device_index is not None: # ensure device index is in range
                audio = pyaudio.PyAudio(); count = audio.get_device_count(); audio.terminate() # obtain device count
                assert 0 <= device_index < count, "Device index out of range"
            assert isinstance(sample_rate, int) and sample_rate > 0, "Sample rate must be a positive integer"
            assert isinstance(chunk_size, int) and chunk_size > 0, "Chunk size must be a positive integer"
            self.device_index = device_index
            self.format = pyaudio.paInt16 # 16-bit int sampling
            self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format) # size of each sample
            self.SAMPLE_RATE = sample_rate # sampling rate in Hertz
            self.CHUNK = chunk_size # number of frames stored in each buffer

            self.audio = None
            self.stream = None
示例#52
0
    def _start_wave_file(self):
        ''' Open a new wave file for writing in the specified output directory.
        Files are named using the UTC timestamp and a c_ prefix
        :return:
        '''
        # figure out the new filename
        now = datetime.utcnow()
        file_name = "c_{0}.wav".format(now.strftime("%Y%m%dT%H%M%SZ"))
        file_path = os.path.join(self.output_directory, file_name)

        self._current_wave_file = wave.open(file_path, mode='wb')
        self._current_wave_file.setnchannels(self._num_channels)
        self._current_wave_file.setsampwidth(pyaudio.get_sample_size(self._sample_format))
        self._current_wave_file.setframerate(self._sample_rate)

        self.log.info("Starting continuous recording: " + file_name)
示例#53
0
 def __init__(self, length, n_channels=1):
     """
     :param length: length of the buffer in samples
     :param n_channels: Number of channels present in the audio samples.
     """
     self._n_channels = n_channels
     self._length = length * self._n_channels  # Length in samples
     self._sample_size = pyaudio.get_sample_size(pyaudio.paFloat32)
     self._sample_format = self._format[pyaudio.paFloat32]
     # Intialize state variables
     self._size = 0
     self._write_start = 0
     self._read_start = 0
     # Instantiate buffer
     self._buffer = np.zeros(self._length, dtype=np.float32)
     print self._buffer.shape
     # Setup blocking interface events
     self._setup_events()
示例#54
0
 def store_data(self, indata):
     print('store_data(): len(frames): ' + str(len(self.frames)))
     self.frames.append(indata)
     if len(self.frames) >= self.chunks_per_file:
         # TODO: Move file I/O to an async thread
         wave_file = wave.open(self.directory +
                               str(self.file_num),
                               'wb')
         wave_file.setnchannels(self.channels)
         wave_file.setsampwidth(pyaudio.get_sample_size(self.audio_format))
         wave_file.setframerate(self.rate)
         wave_file.writeframes(b''.join(self.frames[:self.chunks_per_file]))
         wave_file.close()
         self.file_num += 1
         # move frame
         # TODO: Use a circular buffer or other appropriate data structure,
         # to avoid doing this move in the callback thread.
         temp_frame_buffer = self.frames[self.chunks_per_file:]
         self.frames = temp_frame_buffer
示例#55
0
 def __init__(self, fmt=pa.paInt16, rate=44100):
     self.fmt = fmt
     self.rate = rate
     self.amp = 1 if fmt == pa.paFloat32 else 128**pa.get_sample_size(
         fmt) / 2 - 1
     if fmt == pa.paFloat32:
         self.dtype = np.float32  # pylint: disable=E1101
     elif fmt == pa.paInt32:
         self.dtype = np.Int32  # pylint: disable=E1101
     elif fmt == pa.paInt24:
         raise SampleFormatNotSuportedException('paInt24')
     elif fmt == pa.paInt16:
         self.dtype = np.int16
     elif fmt == pa.paInt8:
         self.dtype = np.int8
     elif fmt == pa.paUInt8:
         self.dtype = np.uint8
     else:
         raise SampleFormatNotSuportedException('paCustomFormat')
     self.reset()
     self.data = b''
示例#56
0
    def trigger_recording(self, filename=None):
        '''
        :param filename: If specified, this filename will be used to save the triggered recording.  If not, a
        filename will be generated by appending the UTC timestamp to t_.
        :return:
        '''

        # figure out the new filename, if not specified
        if filename is None:
            now = datetime.utcnow()
            filename = "t_{0}.wav".format(now.strftime("%Y%m%dT%H%M%SZ"))
        file_path = os.path.join(self.output_directory, filename)

        triggered_wave_file = wave.open(file_path, mode='wb')
        triggered_wave_file.setnchannels(self._num_channels)
        triggered_wave_file.setsampwidth(pyaudio.get_sample_size(self._sample_format))
        triggered_wave_file.setframerate(self._sample_rate)

        triggered_wave_file.writeframes(''.join(self._ring_buffer))
        triggered_wave_file.close()

        self.log.info("Wrote triggered file: " + filename)
示例#57
0
    def __init__(self):
        """
        Initiates the pocketsphinx instance.

        Arguments:
        acive_stt_engine -- performs STT while Jasper is in active listen mode
        """
        self._logger = logging.getLogger(__name__)
        self._logger.info("Initializing PyAudio. ALSA/Jack error messages " +
                          "that pop up during this process are normal and " +
                          "can usually be safely ignored.")
        self._audio = pyaudio.PyAudio()
        self._logger.info("Initialization of PyAudio completed.")

        self.format = pyaudio.paInt16
        self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format)

        self.energy_threshold = 300 # minimum audio energy to consider for recording
        self.dynamic_energy_threshold = True
        self.dynamic_energy_adjustment_damping = 0.15
        self.dynamic_energy_ratio = 1.5
        self.pause_threshold = 0.8 # seconds of non-speaking audio before a phrase is considered complete
        self.phrase_threshold = 0.3 # minimum seconds of speaking audio before we consider the speaking audio a phrase - values below this are ignored (for filtering out clicks and pops)
        self.non_speaking_duration = 0.5 # seconds of non-speaking audio to keep on both sides of the recording
示例#58
0
from __future__ import print_function, division
import pyaudio, os, sys, time, inspect

FILE_PATH = os.path.dirname(inspect.getfile(inspect.currentframe()))

##########################################################################
# SAMPLING PARAMETERS
FRAME_RATE = 44100 # The sound card sampling rate in Hz
DOWNSAMPLED_RATE = 16000 # Hz (MIT REDD uses 15kHz but 16kHz is a standard
#                              rate and so increases compatibility)
RECORD_SECONDS = 1 # Seconds to record per queue item
N_CHANNELS = 2 # one for voltage, one for current 
FRAMES_PER_BUFFER = 1024
SAMPLE_FORMAT = pyaudio.paInt16
SAMPLE_WIDTH = pyaudio.get_sample_size(SAMPLE_FORMAT)
N_READS_PER_QUEUE_ITEM = int(round(FRAME_RATE / FRAMES_PER_BUFFER
                                   * RECORD_SECONDS))

##########################################################################
# MAINS PARAMETERS
MAINS_HZ = 50
SAMPLES_PER_MAINS_CYCLE = FRAME_RATE / MAINS_HZ
PHASE_DIFF_TOLERANCE = SAMPLES_PER_MAINS_CYCLE / 4
SAMPLES_PER_DEGREE = SAMPLES_PER_MAINS_CYCLE / 360

##########################################################################
# PORT TO BROADCAST MEASURES ON
BROADCAST_PORT = 5556

##########################################################################
示例#59
0
import threading
import math
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from pa_tools.audiohelper import AudioHelper
from pa_tools.audiobuffer import AudioBuffer
from pa_tools.stftmanager import StftManager
from pa_tools.audiolocalizer import AudioLocalizer
from pa_tools.distributionlocalizer import DistributionLocalizer



# Setup constants
SAMPLE_TYPE = pyaudio.paFloat32
DATA_TYPE = np.float32
SAMPLE_SIZE = pyaudio.get_sample_size(SAMPLE_TYPE)
SAMPLE_RATE = 44100
FRAMES_PER_BUF = 4096  # For 44100 Fs, be careful going over 4096, loud sounds may occur...
FFT_LENGTH = FRAMES_PER_BUF
WINDOW_LENGTH = FFT_LENGTH
HOP_LENGTH = WINDOW_LENGTH / 2
NUM_CHANNELS_IN = 7
NUM_CHANNELS_OUT = 2
N_THETA = 20
N_PHI = N_THETA / 2
PLOT_CARTES = False
PLOT_POLAR = True
EXTERNAL_PLOT = False
PLAY_AUDIO = True
TIMEOUT = 1
# Setup mics
示例#60
0
    def passiveListen(self, PERSONA):
        """
        Listens for PERSONA in everyday sound. Times out after LISTEN_TIME, so
        needs to be restarted.
        """

        THRESHOLD_MULTIPLIER = 1.8
        RATE = 16000
        CHUNK = 1024

        # number of seconds to allow to establish threshold
        THRESHOLD_TIME = 1

        # number of seconds to listen before forcing restart
        LISTEN_TIME = 10

        # prepare recording stream
        stream = self._audio.open(format=pyaudio.paInt16,
                                  channels=1,
                                  rate=RATE,
                                  input=True,
                                  frames_per_buffer=CHUNK)

        # stores the audio data
        frames = []

        # stores the lastN score values
        lastN = [i for i in range(30)]

        # calculate the long run average, and thereby the proper threshold
        for i in range(0, RATE / CHUNK * THRESHOLD_TIME):

            data = stream.read(CHUNK)
            frames.append(data)

            # save this data point as a score
            lastN.pop(0)
            lastN.append(self.getScore(data))
            average = sum(lastN) / len(lastN)

        # this will be the benchmark to cause a disturbance over!
        THRESHOLD = average * THRESHOLD_MULTIPLIER

        # save some memory for sound data
        frames = []

        # flag raised when sound disturbance detected
        didDetect = False

        # start passively listening for disturbance above threshold
        for i in range(0, RATE / CHUNK * LISTEN_TIME):

            data = stream.read(CHUNK)
            frames.append(data)
            score = self.getScore(data)

            if score > THRESHOLD:
                didDetect = True
                break

        # no use continuing if no flag raised
        if not didDetect:
            print "No disturbance detected"
            stream.stop_stream()
            stream.close()
            return (None, None)

        # cutoff any recording before this disturbance was detected
        frames = frames[-20:]

        # otherwise, let's keep recording for few seconds and save the file
        DELAY_MULTIPLIER = 1
        for i in range(0, RATE / CHUNK * DELAY_MULTIPLIER):

            data = stream.read(CHUNK)
            frames.append(data)

        # save the audio data
        stream.stop_stream()
        stream.close()

        with tempfile.NamedTemporaryFile(mode='w+b') as f:
            wav_fp = wave.open(f, 'wb')
            wav_fp.setnchannels(1)
            wav_fp.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16))
            wav_fp.setframerate(RATE)
            wav_fp.writeframes(''.join(frames))
            wav_fp.close()
            f.seek(0)
            # check if PERSONA was said
            json = self.passive_stt_engine.transcribe(f)
        if json['_text'] and json['outcomes'][0]:  
	    transcribed = []
	    transcribed.append(json['outcomes'][0]['intent'].upper())
       	    if any(PERSONA in phrase for phrase in transcribed):
               return (THRESHOLD, PERSONA)
            else:
	       return (False, transcribed)
        return (False, '')