def raw_audio2wav(raw_audio: bytes, pyaudio_config: dict) -> bytes or None:
    '''
    pyaudio_config has the next format:
    "pyaudio":{
        "format": pyaudio.<format>,
        "channels": int,
        "rate": int,
        "frames_per_buffer": int,
        "device": int
    },
    '''
    try:
        samp_size = pyaudio.PyAudio().get_sample_size(pyaudio_config['format'])
        f = io.BytesIO()
        wave_writer = wave.Wave_write(f)
        wave_writer.setnchannels(pyaudio_config['channels'])
        wave_writer.setsampwidth(samp_size)
        wave_writer.setframerate(pyaudio_config['rate'])
        wave_writer.writeframes(raw_audio)

        f.seek(0)
        return f.read()
    except Exception as e:
        ErrorLogger(__file__, e)
        return None
예제 #2
0
    def _parse_intent(self, intent: Dict) -> Dict:
        '''
        Parse data recieved from api.ai bot
        Args:
            intent: data from api.ai bot
        Returns:
            dictionary = {
                'text': text, 'action':{'name': name, 'parameters':parameters
                              }
            }
        '''

        try:
            answer = dict()
            #extract bot answer
            answer['text'] = intent['result']['fulfillment']['speech']
            answer['intent_name'] = intent['result']['metadata']['intentName']
            answer['action'] = dict()
            answer['action']['name'] = str()
            answer['action']['parameters'] = str()
            #if command is complete (all required fields are filled), extract the command information
            if 'actionIncomplete' in intent['result'] and intent['result'][
                    'actionIncomplete'] is False:
                answer['action']['name'] = intent['result']['action']
                answer['action']['parameters'] = intent['result']['parameters']

            return answer
        except Exception as e:
            ErrorLogger(__file__, e)
            return None
예제 #3
0
    def request(self, msg: str) -> Dict or None:
        '''
        Make request and returns response from api.ai bot
        Args:
            msg: message to send to api.ai bot
        Returns:
            dictionary = {
                'text': text, 'action':{'name': name, 'parameters':parameters
                              }
            }: if data recieved
            None: if failed
        '''

        try:
            req = self._ai.text_request()
            req.lang = 'ru'
            req.session_id = self.session_id
            req.query = msg if isinstance(msg, str) else '.'
            response = json.loads(req.getresponse().read().decode('utf-8'))
            #print('api.ai response: ', response)

            return self._parse_intent(response)
        except Exception as e:
            #print('...!!! with error!!!')
            ErrorLogger(__file__, e)
            return None
예제 #4
0
    def __init__(self, pyaudio_config: dict, min_rms=100):
        '''
        Constructor
        Args:
            pyaudio_config: pyaudio configuration in format
                dictionary = {
            		'format': audio_format,
            		'channels': channels_number,
            		'rate': rate,
            		'frames_per_buffer': frames_per_buffer
                    'device': input_device_number
            	}

            min_rms: minimal sound rms value
        '''

        try:
            self.set_min_rms(min_rms=min_rms)
            #set up pyaudio configuration and start audio stream
            self.format = pyaudio_config['format']
            self.channels = pyaudio_config['channels']
            self.rate = pyaudio_config['rate']
            self.chunk_size = pyaudio_config['frames_per_buffer']
            self.input_device = pyaudio_config['input_device']

            self.audio = pyaudio.PyAudio()
            self.stream = self.audio.open(format=self.format, channels=self.channels, rate=self.rate, input=True, input_device_index=self.input_device, frames_per_buffer=self.chunk_size)
            self.stream.start_stream()
        except Exception as e:
            ErrorLogger(__file__, e)
예제 #5
0
 def listen_audio(self, timeout=1000000) -> bytes or None:
     '''
     Listen sound until silence
     '''
     try:
         start = time.time()
         while time.time() - start < timeout:
             chunk = self.stream.read(self.chunk_size, exception_on_overflow=False)
             #if sound detected record raw data until silence
             if audioop.rms(chunk, 2) >=  self.min_rms:
                 buf = self.record_audio(last_chunk=chunk)
                 return buf
     except Exception as e:
         ErrorLogger(__file__, e)
예제 #6
0
def read_pyaudio_config() -> dict or None:
	'''
	read pyaudio configuration from ROS Parameter Server and return it as dictionary
	'''

	if rospy.has_param('pyaudio'):
		pyaudio_config = rospy.get_param('pyaudio')
		if isinstance(pyaudio_config, dict):
			return pyaudio_config
		elif isinstance(pyaudio_config, str):
			try:
				return json.loads(pyaudio_config)
			except Exception as e:
				ErrorLogger(__file__, e)
				return None
		else:
			return None
예제 #7
0
    def text_to_speech(self, text: str) -> bytes or None:
        '''
        Translate text to speech
        Args:
            text: text to translate
        Returns:
            wave format audio bytes : if translated
            None : if failed
        '''

        try:
            url = 'https://tts.voicetech.yandex.net/generate?text='
            url += parse.quote(text)
            url += '&format=wav&lang=ru&speaker=ermil&key=' + self.yandex_voice_key
            req = request.urlopen(url)

            print('yandex!')
            return req.read()
        except Exception as e:
            ErrorLogger(__file__, e)
            return None
예제 #8
0
    def play_audio(self,
                   audio_data_src: http.client.HTTPResponse or io.BytesIO,
                   format: str = 'wav') -> None:
        '''
        Plays audio_data with format=format
        Args:
            audio_data: binary audio source
            format: audio format
        '''
        try:
            if format == 'wav':
                #define stream chunk
                chunk = 1024

                #open a wav format music
                f = wave.Wave_read(audio_data_src)
                #instantiate PyAudio
                p = pyaudio.PyAudio()
                #open stream
                stream = p.open(format=p.get_format_from_width(
                    f.getsampwidth()),
                                channels=f.getnchannels(),
                                rate=f.getframerate(),
                                output=True)
                #read data
                data = f.readframes(chunk)

                #play stream
                while data:
                    stream.write(data)
                    data = f.readframes(chunk)

                #stop stream
                stream.stop_stream()
                stream.close()

                #close PyAudio
                p.terminate()
        except Exception as e:
            ErrorLogger(__file__, e)
예제 #9
0
 def record_audio(self, last_chunk):
     print('rec')
     '''Records speech until silence
     Args:
         last_chunk: last chunk before record starting
         stream: audio stream
         min_rms: minimal rms value that is not silence
     Returns:
         buf: contains recorded audio data, bytes type
     '''
     try:
         buf = last_chunk
         silence_start = time.time()
         while (time.time() - silence_start) < 1.2:
             chunk = self.stream.read(self.chunk_size, exception_on_overflow=False)
             if audioop.rms(chunk, 2) >= self.min_rms / 3 :
                 buf += chunk
             if audioop.rms(chunk, 2) >= self.min_rms:
                 silence_start = time.time()
         return buf
     except Exception as e:
         ErrorLogger(__file__, e)
         return None
예제 #10
0
    def recognize_speech(self, wav_data: bytes) -> str or None:
        '''
        Recognize speech by wave format audio data

        Args:
            wav_data: wave  audio data with speech
        Returns:
            recognized text : if data recieved
            None: if failed
        '''

        try:
            url = 'https://asr.yandex.net/asr_xml?uuid=' + self._make_uuid()
            url += '&key=' + self.yandex_voice_key + '&topic=queries'


            r = requests.post(url, wav_data, headers={
                    'Host': 'asr.yandex.net',
                    'Content-Type': 'audio/x-wav'
            })

            print(r.text)
            root = ET.fromstring(r.text)
            max_conf = 0
            result = None
            for child in root:
                if child.tag == 'variant':
                    conf = float(child.attrib['confidence'])
                    if conf > max_conf:
                        max_conf = conf
                        result = child.text

            return result
        except Exception as e:
            ErrorLogger(__file__, e)
            return None