Пример #1
0
def main():

    rospy.init_node( 'beep_publisher' )

    # parameters
    loop_rate = rospy.get_param( '~loop_rate', 100 )
    buffer_size = rospy.get_param( '~buffer_size', 4096 )
    use_audio_common = rospy.get_param( '~use_audio_common', False )
    topicname_data = '~audio'
    topicname_info = '~info'

    # import message modules
    if use_audio_common:
        from audio_common_msgs.msg import AudioData
        from audio_stream_msgs.msg import AudioInfo
    else:
        from audio_stream_msgs.msg import AudioData, AudioInfo

    # publishers
    publisher_data = rospy.Publisher( topicname_data, AudioData, queue_size=1000 )
    publisher_info = rospy.Publisher( topicname_info, AudioInfo, queue_size=10 )

    # message
    msg_data = AudioData()
    msg_info = AudioInfo()

    # gen a default buffer
    msg_info.channels = 1
    msg_info.sampling_rate = 16000
    msg_info.width = 2

    frequency_f0 = 440 # 基本周波数
    frequency_beep = 1.0 # beepの周期

    buffer_beep = generateBeep( 0.1*((2**8) ** 2), [ 440, 480, 500, 520, 400, 300, 600, 800 ], frequency_beep, msg_info.sampling_rate, 10.0, 0.5 ).astype(np.uint16).tobytes()

    r = rospy.Rate( loop_rate )
    index = 0
    time_played = 0.0; # sec
    time_start = time.time()
    while not rospy.is_shutdown():
        rospy.loginfo('publish')
        if index > len(buffer_beep):
            index = 0
        msg_info.header.stamp = rospy.Time.now()
        if ( len(buffer_beep) - index) < buffer_size:
            msg_data.data = buffer_beep[index:]
            time_played += 1.0 * ( len(buffer_beep) - index ) / ( msg_info.width * msg_info.sampling_rate )
        else:
            msg_data.data = buffer_beep[index:index+buffer_size]
            time_played += 1.0 * buffer_size / ( msg_info.width * msg_info.sampling_rate )
        time_passed = time.time() - time_start
        while time_played - time_passed > 0:
            time_passed = time.time() - time_start
            if rospy.is_shutdown():
                return
            r.sleep()
        publisher_data.publish( msg_data )
        publisher_info.publish( msg_info )
        index += buffer_size
 def audio_cb(self, data):
     #: :type data: AudioBuffer
     #ab = AudioBuffer()
     # This contains (in the 'data' field): int16[] data
     rospy.loginfo("Callback received!")
     ad = AudioData()
     # This is: uint8[] data
     data_uint8 = numpy.array(data.data, dtype=numpy.uint8)
     ad.data = data_uint8.tolist()
     self.pub.publish(ad)
Пример #3
0
    def sample_audio_test(self, in_data, frame_count, time_info, status):

        #data_int = np.fromstring(in_data, dtype=np.int16)
        #msg_audio = AudioData16()
        #msg_audio.data = data_int

        msg_audio = AudioData()
        msg_audio.data = in_data
        self.raw_pub.publish(msg_audio)
       
        return None, pyaudio.paContinue
Пример #4
0
    def tts(self, words):
        result = self.client.synthesis(words, 'zh', 1, {
            'vol': 5,
        })

        if isinstance(result, dict):
            rospy.logerr(result)
            return None
        audio_data = AudioData()
        audio_data.data = result
        return audio_data
Пример #5
0
 def read_from_cache(text):
     if not os.path.exists("/tmp/xiaoqiang_tts"):
         os.mkdir("/tmp/xiaoqiang_tts")
         return None
     if os.path.exists(os.path.join("/tmp/xiaoqiang_tts", text.data)):
         with open(os.path.join("/tmp/xiaoqiang_tts", text.data),
                   "rb") as audio_file:
             audio_data = AudioData()
             audio_data.data = audio_file.read()
             return audio_data
     return None
Пример #6
0
 def on_audio(self, data, channel):
     self.pub_audios[channel].publish(AudioData(data=data))
     if channel == self.main_channel:
         self.pub_audio.publish(AudioData(data=data))
         if self.is_speeching:
             if len(self.speech_audio_buffer) == 0:
                 self.speech_audio_buffer = self.speech_prefetch_buffer
             self.speech_audio_buffer += data
         else:
             self.speech_prefetch_buffer += data
             self.speech_prefetch_buffer = self.speech_prefetch_buffer[-self.speech_prefetch_bytes:]
Пример #7
0
    def stream_audio(self):
        reccmd = self.arecord_command
        self.p = subprocess.Popen(reccmd, stdout=subprocess.PIPE)

        while not rospy.is_shutdown():
            self.last_data = self.p.stdout.read(self.block_size)
            if self.last_data is not None:
                ad = AudioData()
                ad.data = tuple(np.fromstring(self.last_data, dtype=np.uint8))
                self.pub.publish(ad)

        self.close()
Пример #8
0
    def run(self):
        try:
            data = self.stream.read(self.CHUNK)
        except IOError as ex:
            if ex[1] != pyaudio.paInputOverflowed:
                raise
            print("ERROR")
            return

        amplitude = np.fromstring(data, np.uint8)
        print type(amplitude)
        msg = AudioData()
        msg.data = amplitude.tolist()

        self.pub.publish(msg)
Пример #9
0
 def read_from_cache(text):
     m = hashlib.md5()
     m.update(text.data)
     audio_filename = m.hexdigest()
     if not os.path.exists("xiaoqiang_tts"):
         os.mkdir("xiaoqiang_tts")
         return None
     if os.path.exists(os.path.join("xiaoqiang_tts", audio_filename)) and \
      os.path.getsize(os.path.join("xiaoqiang_tts", audio_filename)) > 0:
         with open(os.path.join(
                 "xiaoqiang_tts", audio_filename), "rb") as audio_file:
             audio_data = AudioData()
             audio_data.data = audio_file.read()
             return audio_data
     return None
Пример #10
0
 def topic_cb(self, msg):
     # Retrieve 1 channel audio data
     tmp = list(msg.data)
     tmp = tmp[0::len(msg.channelMap)]
     # Convert audio format: Int -> Buffer(Uint8)
     dataBuff = ""
     for i in range(0, len(tmp)):
         if tmp[i] < 0:
             tmp[i] = tmp[i] + 65536
         dataBuff = dataBuff + chr(tmp[i] % 256)
         dataBuff = dataBuff + chr((tmp[i] - (tmp[i] % 256)) / 256)
     # Publish audio topic
     audio_data = AudioData()
     audio_data.data = dataBuff
     self.audiodata_pub.publish(audio_data)
Пример #11
0
def main():
    rospy.init_node('AudioCapturer', anonymous=True)
    publisher = rospy.Publisher("rawAudioChunk", AudioData, queue_size=20)

    p = pyaudio.PyAudio()
    stream = p.open(
        input_device_index=None,  # Default device
        format=FORMAT,
        channels=CHANNELS,
        rate=RATE,
        input=True,
        frames_per_buffer=CHUNK_SIZE)
    print("*Recording*")

    # Loop while node is not closed and audio is working. Note that
    # here is "not needed" something like `spin()` or `loop_sleep()`
    # because (presumably) PyAudio manages the blocked times waiting for
    # IO and puts the process/thread to sleep, also there are no callbacks
    # by ROS.
    while stream.is_active() and not rospy.is_shutdown():
        try:
            in_data = stream.read(CHUNK_SIZE, exception_on_overflow=False)

            msg = in_data
            publisher.publish(AudioData(data=msg))
        except IOError as e:
            print("I/O error({0}): {1}".format(e.errno, e.strerror))
            # break

    if not stream.is_active():
        print("Stream was not active.")

    stream.stop_stream()
    stream.close()
    p.terminate()
Пример #12
0
  def onInitialize(self):
    #rospy.loginfo(self._properties.getProperty("type_name") + " version " + self._properties.getProperty("version"))
    rospy.loginfo("Copyright (C) 2010-2011 Yosuke Matsusaka")
    rospy.loginfo("Copyright (C) 2017 Isao Hara")
    rospy.loginfo("Copyright (C) 2020 Isao Hara")
    self._prevtime = time.clock()

    # configuration parameters
    self._samplerate = [16000,]
    self.bindParameter("rate", self._samplerate, 16000)
    self._character = ["male",]
    self.bindParameter("character", self._character, "male")

    self._sampling_rate = [0,]
    self.bindParameter("sampling_rate", self._sampling_rate, 0)

    # create inport
    self._tts_sub = rospy.Subscriber('/tts/data', String, self.onData)

    # create outport for wave data
    self._audio_out = rospy.Publisher('/audio_play/audio', AudioData, queue_size=1)
    self._outdata = AudioData()

    self._status = "ready"
    self._is_active = False
    return True
Пример #13
0
 def publish(self):
     # Input Data
     input_data = self.audio_data_buffer[:self.length_queue_popup]
     self.audio_data_buffer = self.audio_data_buffer[self.length_queue_popup:]
     try:
         is_speech = self._vad.is_speech(
             input_data, self._audio_info.sample_rate)
     except Exception as e:
         rospy.logerr('Got an error while processing. {}'.format(e))
         return
     self._pub_is_speech.publish(Bool(is_speech))
     if self._current_speaking == True and is_speech == True:
         self._speech_audio_buffer += input_data
     elif self._current_speaking == False and is_speech == True:
         self._speech_audio_buffer = input_data
         self._current_speaking = True
     elif self._current_speaking == True and is_speech == False:
         self._speech_audio_buffer = self._speech_audio_buffer + input_data
         speech_duration = (len(self._speech_audio_buffer) /
                            2.0) / self._audio_info.sample_rate
         if speech_duration > self._minimum_duration:
             self._pub_speech_audio.publish(
                 AudioData(self._speech_audio_buffer))
         else:
             rospy.logwarn(
                 'speech duration: {} dropped'.format(speech_duration))
         self._current_speaking = False
         self._speech_audio_buffer = None
         rospy.logwarn('Cleared speech_audio_buffer.')
Пример #14
0
    def on_timer(self, event):
        stamp = event.current_real or rospy.Time.now()
        is_voice = self.respeaker.is_voice()
        doa_rad = math.radians(self.respeaker.direction - 180.0)
        doa_rad = angles.shortest_angular_distance(
            doa_rad, math.radians(self.doa_yaw_offset))
        doa = math.degrees(doa_rad)

        # vad
        if is_voice != self.prev_is_voice:
            self.pub_vad.publish(Bool(data=is_voice))
            self.prev_is_voice = is_voice

        # doa
        if doa != self.prev_doa:
            self.pub_doa_raw.publish(Int32(data=doa))
            self.prev_doa = doa

            msg = PoseStamped()
            msg.header.frame_id = self.sensor_frame_id
            msg.header.stamp = stamp
            ori = T.quaternion_from_euler(math.radians(doa), 0, 0)
            msg.pose.position.x = self.doa_xy_offset * np.cos(doa_rad)
            msg.pose.position.y = self.doa_xy_offset * np.sin(doa_rad)
            msg.pose.orientation.w = ori[0]
            msg.pose.orientation.x = ori[1]
            msg.pose.orientation.y = ori[2]
            msg.pose.orientation.z = ori[3]
            self.pub_doa.publish(msg)

        if self.asr_engine != "silent":
            #actual cutting of the "speech" only audio for sending to asr service
            # get timestamp of "last time mic array detected voice"
            if is_voice and not self.pepper_is_speaking:
                self.speech_stopped = stamp

            #pepper is speaking: we go to cut (correct)
            #pepper is not speaking:
            #we are past the grace period in speaking:
            #second statement fails, we go to cutting (correct)
            #we are not past the grace period in speaking:
            #second statement is true, we don't cut off audio and go on with is_speaking = True (correct)
            if not self.pepper_is_speaking and (stamp - self.speech_stopped <
                                                rospy.Duration(
                                                    self.speech_continuation)):
                self.is_speaking = True  #grace period "speech_continuation where we just continue 'is speaking'"

            elif self.is_speaking:  #otherwise there has been no speech for too long, and we were obviously speaking, so we cut the audio here
                buf = self.speech_audio_buffer
                self.speech_audio_buffer = str()
                self.is_speaking = False
                duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth
                duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth
                rospy.loginfo("Speech detected for %.3f seconds" % duration)
                if self.speech_min_duration <= duration < self.speech_max_duration:

                    self.pub_speech_audio.publish(AudioData(data=buf))
Пример #15
0
 def on_audio(self, data):
     self.pub_audio.publish(AudioData(data=data))
     if self.is_speeching:
         if len(self.speech_audio_buffer) == 0:
             self.speech_audio_buffer = self.speech_prefetch_buffer
         self.speech_audio_buffer += data
     else:
         self.speech_prefetch_buffer += data
         self.speech_prefetch_buffer = self.speech_prefetch_buffer[-self.speech_prefetch_bytes:]
Пример #16
0
    def read_from_cache(text):
        global locale
        m = hashlib.md5()
        m.update(text.data)
        audio_filename = m.hexdigest()
        if not os.path.exists("xiaoqiang_tts"):
            os.mkdir("xiaoqiang_tts")
            return None
        # 兼容以前版本
        if os.path.exists(os.path.join("xiaoqiang_tts", audio_filename)) and \
                os.path.getsize(os.path.join("xiaoqiang_tts", audio_filename)) > 0 and locale == "zh-cn":
            with open(os.path.join("xiaoqiang_tts", audio_filename),
                      "rb") as audio_file:
                audio_data = AudioData()
                audio_data.data = audio_file.read()
                return audio_data

        if not os.path.exists(os.path.join("xiaoqiang_tts", locale)):
            os.mkdir(os.path.join("xiaoqiang_tts", locale))
            return None

        # 从对应locale路径读取
        if os.path.exists(os.path.join("xiaoqiang_tts", locale, audio_filename)) and \
                os.path.getsize(os.path.join("xiaoqiang_tts", locale, audio_filename)) > 0:
            with open(os.path.join("xiaoqiang_tts", locale, audio_filename),
                      "rb") as audio_file:
                audio_data = AudioData()
                audio_data.data = audio_file.read()
                return audio_data
        return None
Пример #17
0
    def on_audio(self, data):
	speech_data = None
	for i, ch in enumerate(self.channels):
            chan_data = data[:, ch].tostring()
	    if not speech_data:speech_data = chan_data

            self.pub_audio[i].publish(AudioData(data=chan_data))

        if self.is_speeching:
            if len(self.speech_audio_buffer) == 0:
                self.speech_audio_buffer = self.speech_prefetch_buffer
            self.speech_audio_buffer += speech_data 
        else:
            self.speech_prefetch_buffer += speech_data
            self.speech_prefetch_buffer = self.speech_prefetch_buffer[-self.speech_prefetch_bytes:]
Пример #18
0
    def on_timer(self, event):
        stamp = event.current_real or rospy.Time.now()
        is_voice = self.respeaker.is_voice()
        doa_rad = math.radians(self.respeaker.direction - 180.0)
        doa_rad = angles.shortest_angular_distance(
            doa_rad, math.radians(self.doa_yaw_offset))
        doa = math.degrees(doa_rad)

        # vad
        if is_voice != self.prev_is_voice:
            self.pub_vad.publish(Bool(data=is_voice))
            self.prev_is_voice = is_voice

        # doa
        if doa != self.prev_doa:
            self.pub_doa_raw.publish(Int32(data=doa))
            self.prev_doa = doa

            msg = PoseStamped()
            msg.header.frame_id = self.sensor_frame_id
            msg.header.stamp = stamp
            ori = T.quaternion_from_euler(math.radians(doa), 0, 0)
            msg.pose.position.x = self.doa_xy_offset * np.cos(doa_rad)
            msg.pose.position.y = self.doa_xy_offset * np.sin(doa_rad)
            msg.pose.orientation.w = ori[0]
            msg.pose.orientation.x = ori[1]
            msg.pose.orientation.y = ori[2]
            msg.pose.orientation.z = ori[3]
            self.pub_doa.publish(msg)

        # speech audio
        if is_voice:
            self.speech_stopped = stamp
        if stamp - self.speech_stopped < rospy.Duration(
                self.speech_continuation):
            self.is_speeching = True
        elif self.is_speeching:
            buf = self.speech_audio_buffer
            self.speech_audio_buffer = str()
            self.is_speeching = False
            duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth
            duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth
            rospy.loginfo("Speech detected for %.3f seconds" % duration)
            if self.speech_min_duration <= duration < self.speech_max_duration:

                self.pub_speech_audio.publish(AudioData(data=buf))
Пример #19
0
    def onInitialize(self):
        rospy.loginfo("GoogleTextToSpeechRos version " + __version__)
        rospy.loginfo("Copyright (C) 2019 Isao Hara")

        #
        # create inport for audio stream
        self._inport = rospy.Subscriber('/tts/data', String, self.onData)

        #
        # create outport for result
        self._outdata = AudioData()
        self._outport = rospy.Publisher('/audio_play/audio',
                                        AudioData,
                                        queue_size=10)

        #
        #
        self.show_copyrights()
        return True
Пример #20
0
    def send_audio(self):

        #print bytearray(buff)
        #print len(buff)
        #print len(buff)/200
        with contextlib.closing(wave.open("test.wav", 'wb')) as wf:
            wf.setnchannels(1)
            wf.setsampwidth(2)
            wf.setframerate(16000)
            wf.writeframes(bytearray(self.tmp_buff))

        tmp = []
        with contextlib.closing(open("test.wav", 'rb')) as wf:
            ba = wf.read()
        for c in ba:
            #print c
            value = struct.unpack('B', c)[0]
            #print value
            tmp.append(value)
        self.pub_audio.publish(AudioData(tmp))

        self.tmp_buff = None
Пример #21
0
def main():

    rospy.init_node('colored_noise_publisher')

    # parameters
    loop_rate = int(rospy.get_param('~loop_rate', 100))
    buffer_size = int(rospy.get_param('~buffer_size', 4096))
    use_audio_common = bool(rospy.get_param('~use_audio_common', False))
    beta = float(rospy.get_param('~exponent', 1))
    level = float(rospy.get_param('~level', 0.5))

    # import message modules
    if use_audio_common:
        from audio_common_msgs.msg import AudioData
        from audio_stream_msgs.msg import AudioInfo
    else:
        from audio_stream_msgs.msg import AudioData, AudioInfo

    # publishers
    publisher_data = rospy.Publisher('~audio', AudioData, queue_size=1000)
    publisher_info = rospy.Publisher('~info', AudioInfo, queue_size=10)

    # gen a default buffer
    msg_info = AudioInfo()
    msg_data = AudioData()
    msg_info.channels = 1
    msg_info.sampling_rate = 16000
    msg_info.width = 2
    A = level * ((2**8)**2)
    buffer_wave = generateColoredNoise(beta, A, msg_info.sampling_rate,
                                       30).astype(np.uint16).tobytes()

    r = rospy.Rate(loop_rate)
    index = 0
    time_played = 0.0  # sec
    time_start = time.time()
    while not rospy.is_shutdown():

        rospy.loginfo('publish')

        # update msg_info header
        msg_info.header.stamp = rospy.Time.now()

        # fill msg_data.data
        if (len(buffer_wave) - index) < buffer_size:
            msg_data.data = buffer_wave[index:] + buffer_wave[:index +
                                                              buffer_size -
                                                              len(buffer_wave)]
            time_played += 1.0 * (len(buffer_wave) - index) / (
                msg_info.width * msg_info.sampling_rate)
            index = index + buffer_size - len(buffer_wave)
        else:
            msg_data.data = buffer_wave[index:index + buffer_size]
            time_played += 1.0 * buffer_size / (msg_info.width *
                                                msg_info.sampling_rate)
            index = index + buffer_size

        time_passed = time.time() - time_start
        while time_played > time_passed:
            time_passed = time.time() - time_start
            if rospy.is_shutdown():
                return
            r.sleep()

        # publish them
        publisher_data.publish(msg_data)
        publisher_info.publish(msg_info)
Пример #22
0
 def on_raw_audio(self, data, mic):
     pub = self.raw_audio_pubs[mic]
     pub.publish(AudioData(data=data))
Пример #23
0
 def __init__(self):
    self.sub_audio = rospy.Subscriber('/audio', AudioData, self.audioCB)
    self.audio = AudioData()
Пример #24
0
import time
from pyhelper_fns.vis_utils import MyAnimationMulti
import numpy as np
import time
import pickle
import argparse

def gray_rgb(image):
    return np.concatenate([np.expand_dims(image, 2), np.expand_dims(image, 2), np.expand_dims(image, 2)], axis = 2)
vis_tool = MyAnimationMulti(None, numPlots=6, isIm = np.ones(6))

if __name__ == '__main__':
    
    rospy.init_node('traj_replay_audio', anonymous = True)
    pub = rospy.Publisher('replay_audio/audio', AudioData, queue_size = 10)
    msg = AudioData()
    
    parser = argparse.ArgumentParser()
    parser.add_argument('path', type=str,
                        help='path to pickle file')
                        
    args = parser.parse_args()
    FILE_NAME = args.path
    
    with open(FILE_NAME, "rb") as handle:
        data_dict = pickle.load(handle)
    states = data_dict["state_list"]
    actions = data_dict["action_list"]
    images = data_dict["image_list"]
    audio = data_dict["audio_list"]
    print("len of states{} len of actions{} len of images{}".format(len(states), len(actions), len(images)))
Пример #25
0
def main():

    rospy.init_node( 'audio_stream_publisher' )

    # parameters
    loop_rate = rospy.get_param( '~loop_rate', 100 )
    is_loop = rospy.get_param( '~is_loop', False )
    filename = rospy.get_param( '~filename' )
    buffer_size = rospy.get_param( '~buffer_size', 4096 )
    max_precede = rospy.get_param( '~max_precede', 10.0 )
    use_audio_common = rospy.get_param( '~use_audio_common', False )
    topicname_data = '~audio'
    topicname_info = '~info'

    # import message modules
    if use_audio_common:
        from audio_common_msgs.msg import AudioData
        from audio_stream_msgs.msg import AudioInfo
    else:
        from audio_stream_msgs.msg import AudioData, AudioInfo

    # publishers
    publisher_data = rospy.Publisher( topicname_data, AudioData, queue_size=1 )
    publisher_info = rospy.Publisher( topicname_info, AudioInfo, queue_size=1 )

    # message
    msg_data = AudioData()
    msg_info = AudioInfo()

    # open wavefile
    wavefile = wave.open( filename, 'r' )
    wave_channels = wavefile.getnchannels()
    wave_width = wavefile.getsampwidth()
    wave_framerate = wavefile.getframerate()
    wave_framenum = wavefile.getnframes()
    wave_data = wavefile.readframes( wave_framenum )

    infostr = (\
    '== Wave file info ==\n' +\
    'channels: {}\n' +\
    'sample size [byte]: {}\n' +\
    'sampling rate [hz]: {}\n' +\
    'number of frames: {} ( {} secs )').format( wave_channels, wave_width, wave_framerate, wave_framenum, wave_framenum*1.0/wave_framerate )
    rospy.loginfo( infostr )

    if wave_channels != 1 or not ( wave_width == 1 or wave_width == 2):
        rospy.logerr( 'Currently 8bit or 16bit monaural wave files are supported.' )
        sys.exit(1)
    if buffer_size % wave_width > 0:
        rospy.logerr ( 'buffer_size must be multiple of wave_width' )
        sys.exit(1)

    msg_info.channels = wave_channels
    msg_info.sampling_rate = wave_framerate
    msg_info.width = wave_width
    msg_info.header.stamp = rospy.Time.now()
    publisher_info.publish( msg_info )

    r = rospy.Rate( loop_rate )
    if is_loop:
        time_played = 0.0; # sec
        time_start = time.time()
        index = 0
        while not rospy.is_shutdown():
            # return to first if index is out of wave_data
            if index > wave_framenum:
                index = 0
            # preparing msg data
            msg_info.header.stamp = rospy.Time.now()
            if ( wave_framenum - index ) < buffer_size:
                msg_data.data = wave_data[index:]
                time_played += 1.0 * ( wave_framenum - index ) / ( wave_width * wave_framerate )
            else:
                msg_data.data = wave_data[index:index+buffer_size]
                time_played += 1.0 * buffer_size / ( wave_width * wave_framerate )
            # sleep if played time is ahead of passed time
            time_passed = time.time() - time_start
            while time_played - time_passed > max_precede:
                time_passed = time.time() - time_start
                if rospy.is_shutdown():
                    return
                r.sleep()
            # publishing data
            publisher_data.publish( msg_data )
            publisher_info.publish( msg_info )
            # debug print
            rospy.logdebug( 'time played: {}, time passed {}'.format( time_played, time_passed ) )
            # increase index
            index += buffer_size
    else:
        time_played = 0.0; # sec
        time_start = time.time()
        index = 0
        while not rospy.is_shutdown():
            # return to first if index is out of wave_data
            if index > wave_framenum:
                break
            # preparing msg data
            msg_info.header.stamp = rospy.Time.now()
            if ( wave_framenum - index ) < buffer_size:
                msg_data.data = wave_data[index:]
                time_played += 1.0 * ( wave_framenum - index ) / ( wave_width * wave_framerate )
            else:
                msg_data.data = wave_data[index:index+buffer_size]
                time_played += 1.0 * buffer_size / ( wave_width * wave_framerate )
            # sleep if played time is ahead of passed time
            time_passed = time.time() - time_start
            while time_played - time_passed > max_precede:
                time_passed = time.time() - time_start
                if rospy.is_shutdown():
                    return
                r.sleep()
            # publishing data
            publisher_data.publish( msg_data )
            publisher_info.publish( msg_info )
            # debug print
            rospy.logdebug( 'time played: {}, time passed {}'.format( time_played, time_passed ) )
            # increase index
            index += buffer_size
Пример #26
0
 def publishAudio(self):
     if self.chunk_count > self.MIN_CHUNKS_AUDIO_LENGTH:
         self.publisher.publish(AudioData(self.voiced_frames))
     self.discardAudio()
Пример #27
0
    def on_timer(self, event):
        stamp = event.current_real or rospy.Time.now()
        is_voice = self.respeaker.is_voice()
        doa_rad = math.radians(self.respeaker.direction - 180.0)
        doa_rad = angles.shortest_angular_distance(
            doa_rad, math.radians(self.doa_yaw_offset))
        doa = math.degrees(doa_rad)

        if self.is_active:
            self.wait_command_count += 1
            if self.wait_command_count > self.wait_command_thres:
                self.is_active = False
                self.respeaker.set_led_off()
        
        if self.is_waiting_response:
            self.wait_response_count += 1
            if (self.wait_response_count > self.wait_response_thres):
                self.is_waiting_response = False
                self.respeaker.set_led_off()

        # vad
        # if is_voice != self.prev_is_voice:
        #     self.pub_vad.publish(Bool(data=is_voice))
        #     self.prev_is_voice = is_voice

        # doa
        if doa != self.prev_doa:
            self.pub_doa_raw.publish(Int32(data=doa))
            self.prev_doa = doa

            # msg = PoseStamped()
            # msg.header.frame_id = self.sensor_frame_id
            # msg.header.stamp = stamp
            # ori = T.quaternion_from_euler(math.radians(doa), 0, 0)
            # msg.pose.position.x = self.doa_xy_offset * np.cos(doa_rad)
            # msg.pose.position.y = self.doa_xy_offset * np.sin(doa_rad)
            # msg.pose.orientation.w = ori[0]
            # msg.pose.orientation.x = ori[1]
            # msg.pose.orientation.y = ori[2]
            # msg.pose.orientation.z = ori[3]
            # self.pub_doa.publish(msg)

        # speech audio
        if is_voice:
            self.speech_stopped = stamp
        if stamp - self.speech_stopped < rospy.Duration(self.speech_continuation):
            self.is_speeching = True
        elif self.is_speeching:
            buf = self.speech_audio_buffer
            self.speech_audio_buffer = str()
            self.is_speeching = False
            duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth
            duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth
            rospy.loginfo("Speech detected for %.3f seconds" % duration)
            status = self.detector.RunDetection(buf)
            if (status > 0):
                print("found kws")
                self.is_active = True
                self.wait_command_count = 0
                self.respeaker.set_led_listen()
                return
            else:
                print("not kws")
                # self.respeaker.set_led_trace()
            if (self.is_active):
                self.is_active = False
                if self.speech_min_duration <= duration < self.speech_max_duration:
                    self.respeaker.set_led_think()
                    self.is_waiting_response = True
                    self.wait_response_count = 0
                    self.pub_speech_audio.publish(AudioData(data=buf))
Пример #28
0
    def subscribe(self):
        pass

    def unsubscribe(self):
        pass


if __name__ == '__main__':
    rospy.init_node('audio_capture_microphone')
    s = SPH0645Audio()
    r = rospy.Rate(float(s.rate) / s.frame_per_buffer)
    signal.signal(signal.SIGINT, s.kill)
    while s.stream.is_active():
        if s._connection_status is not True:
            # If not subscribed, only read stream to avoid 'IOError: [Errno -9981] Input overflowed'
            # https://stackoverflow.com/questions/10733903/pyaudio-input-overflowed
            in_data = np.frombuffer(s.stream.read(1024, exception_on_overflow=False), np.int32)
            r.sleep()
            continue
        # Input data to 16 bit
        in_data = np.frombuffer(s.stream.read(1024), np.int32)
        in_data = in_data >> 14  # This 18bit integer is raw data from microphone
        int16_data = (in_data >> 2).astype(np.int16)
        # Retreive 1 channel data
        chunk_per_channel = len(int16_data) / s.channels
        channel_data = int16_data[s.channel::s.channels]
        # Publish audio topic
        s.pub_audio.publish(AudioData(data=channel_data.tostring()))
        r.sleep()
Пример #29
0
 def on_audio(self, data):
     self.pub_audio.publish(AudioData(data=data))
Пример #30
0
#
# Author: Randoms
#

import rospy
from audio_common_msgs.msg import AudioData
from engines.baidu_tts import BaiduTTS as bd_client
from engines.xunfei_tts import XunfeiTTS as xf_client
from std_msgs.msg import String, Bool
import time
import struct
import numpy
import matplotlib.pyplot as plt
from scipy import signal

AUDIO_CACHE = AudioData()
CURRENT_AUDIO = AudioData()
NEW_AUDIO_FLAG = False
MIN_VOLUM = 2000
AUDIO_STATUS = False

b, a = signal.butter(3, 0.10, 'high')


def is_end(audio_data):
    last_two_seconds = audio_data.data[2 * -16000 * 2:]
    if len(last_two_seconds) < 2 * 16000 * 2:
        return False
    last_two_seconds = [float(struct.unpack("<h", "".join(
        last_two_seconds[2*x:2*x + 2]))[0]) for x in range(0, 2 * 16000)]
    sf = signal.filtfilt(b, a, last_two_seconds)
            break
        sounds = sounds[0]
        positions = positions[0]
        
        rate = rospy.Rate(MSG_FREQ)
        sound = np.int16(sounds.mean(0) * 32768)

        delete_all = Marker()
        delete_all.action = Marker.DELETEALL
        delete_all.ns = 'sound_sources'
        delete_all = MarkerArray(markers=[delete_all])
        
        for t in xrange((sound.shape[0]+frames_per_msg-1)/frames_per_msg):
            begin = t*frames_per_msg
            end = min((t+1)*frames_per_msg, sound.shape[0])
            audio_pub.publish(AudioData(data = sound[begin:end].tostring()))

            markers = MarkerArray()
            for i, (snd, pos) in enumerate(zip(sounds, positions)):
                scale = np.abs(snd[begin:end]).max()
                if scale == 0.:
                    continue
                marker = Marker()
                marker.header.stamp = rospy.Time.now()
                marker.header.frame_id = 'base_footprint'
                marker.type = Marker.SPHERE
                marker.ns = 'sound_sources'
                marker.id = i
                marker.pose.position.x = (pos[0] - MAP_SIZE/2. + .5) * resolution
                marker.pose.position.y = (pos[1] - MAP_SIZE/2. + .5) * resolution
                marker.pose.orientation.w = 1.
Пример #32
0
 def callback(self, data):
     rospy.loginfo("Callback received!")
     ad = AudioData()
     data_uint8 = numpy.array(data.data, dtype=numpy.uint8)
     ad.data = data_uint8.tolist()
     self.pub.publish(ad)