Exemplo n.º 1
0
    def run(self):
        self.merger.start()

        if self.visualization:
            self.visualizer.start()

        recording_id_odas = [0, 0, 0, 0]
        last_recording_id_odas = [0, 0, 0, 0]

        recordings = {}
        # request to speaker recognition waiting to be answered, key is the id,
        # value is the queue in which the result will be stored
        sr_requests = {}

        # kevins ros changes
        pub = rospy.Publisher('/roboy/cognition/sam/output',
                              String,
                              queue_size=10)
        rospy.Subscriber("/roboy/control/matrix/leds/mode", ControlLeds,
                         self.mode_callback)
        rospy.Subscriber("/roboy/control/matrix/leds/freeze", msg_Empty,
                         self.freeze_callback)
        # s = rospy.Service('/roboy/cognition/speech/recognition', RecognizeSpeech, self.handle_service)
        # self.ledmode_pub = rospy.Publisher("/roboy/control/matrix/leds/mode/simple", Int32, queue_size=3)
        # self.ledoff_pub = rospy.Publisher('/roboy/control/matrix/leds/off', msg_Empty, queue_size=10)
        # self.ledfreeze_pub = rospy.Publisher("/roboy/control/matrix/leds/freeze", msg_Empty, queue_size=1)
        # self.ledpoint_pub = rospy.Publisher("/roboy/control/matrix/leds/point", Int32, queue_size=1)
        rospy.init_node("SAM", anonymous=True)

        # operation average
        angle_list = []

        while self.merger.is_alive() and not rospy.is_shutdown():

            # we do ask for the next data block
            # maybe this is the place where i can insert a call and replace the while loop

            # wait for/get next data
            try:
                next_data = self.merger_to_main_queue.get(block=True,
                                                          timeout=1)
            except q_Empty:
                continue  # restart loop, but check again if we maybe got a stop signal

            cid = next_data['id_info']
            caudio = next_data['audio_data']

            ############################################################################################
            # this part separates the 4 streams and manages the ones where currently audio is being recorded
            #########################################################################################
            # cid[i] = [id, x, y, z, activity]
            for i in range(len(cid)):  # len=4

                recording_id_odas[i] = cid[i][0]

                if recording_id_odas[i] > 0:
                    if recording_id_odas[i] == last_recording_id_odas[i]:
                        # same person continues speaking
                        recordings[recording_id_odas[i]].audio = np.append(
                            recordings[recording_id_odas[i]].audio, caudio[i])
                        recordings[recording_id_odas[i]].currentpos = [
                            cid[i][1], cid[i][2], cid[i][3]
                        ]

                    else:
                        # a person started speaking
                        recordings[recording_id_odas[i]] = Recording(
                            recording_id_odas[i],
                            [cid[i][1], cid[i][2], cid[i][3]])
                        recordings[recording_id_odas[i]].audio = np.append(
                            recordings[recording_id_odas[i]].audio, caudio[i])

                        # if a different person was speaking before, he is now done
                        if last_recording_id_odas[i] > 0:
                            recordings[
                                last_recording_id_odas[i]].stopped = True
                elif recording_id_odas[
                        i] == 0 and last_recording_id_odas[i] > 0:
                    # if a different person was speaking before, he is now done
                    recordings[last_recording_id_odas[i]].stopped = True

                last_recording_id_odas[i] = recording_id_odas[i]

            ##########################################################
            # check if we got any answers from sr (speaker recognition) in the meantime
            #############################################################
            to_delete_req = []
            for rec_id, req in sr_requests.iteritems():
                try:
                    # sr_id: -99 means new speaker
                    # certainty between 0-10
                    certainty = 0
                    preliminary_id, sr_id, certainty = req.get(block=False)

                    # Fuse info of speaker recognition on localization together
                    # First the best case, both agree on an is/new speker
                    if sr_id == recordings[rec_id].preliminary_speaker_id:
                        # both agree, thats nice
                        recordings[rec_id].final_speaker_id = recordings[
                            rec_id].preliminary_speaker_id
                        recordings[rec_id].send_to_trainer = True
                    elif recordings[
                            rec_id].created_new_speaker and sr_id == -99:
                        # both agree, that this is a new speaker
                        output_string = "both agree that rec %d is new speaker %d" % (
                            rec_id, recordings[rec_id].preliminary_speaker_id)
                        rospy.logdebug(output_string)
                        recordings[rec_id].final_speaker_id = recordings[
                            rec_id].preliminary_speaker_id
                        recordings[rec_id].send_to_trainer = True
                    else:

                        # Now come the harder parts.
                        if certainty < 1:
                            # if speaker recognition is unsure we rely on localization
                            recordings[rec_id].final_speaker_id = recordings[
                                rec_id].preliminary_speaker_id
                        elif certainty > 8:
                            # sr is super sure, we trust it
                            recordings[rec_id].final_speaker_id = sr_id
                            recordings[rec_id].sr_changed_speaker = True
                        else:
                            # check the angle the the speaker sr suggested, and depending on the certainty decide
                            # go through the list of speaker angles and find the one one which sr suggests
                            found = False
                            for (oth_id, angl
                                 ) in recordings[rec_id].angles_to_speakers:
                                if oth_id == sr_id:
                                    # the further we are away the shurer sr has to be
                                    if certainty * 20 > angl:
                                        recordings[
                                            rec_id].final_speaker_id = sr_id
                                        recordings[
                                            rec_id].sr_changed_speaker = True
                                    else:
                                        recordings[
                                            rec_id].final_speaker_id = recordings[
                                                rec_id].preliminary_speaker_id
                                    found = True
                                    break
                            if not found:
                                # this shouldn't happen
                                output_string = "Speaker recognition suggestested id {} for recording {}," \
                                                " which doesn't exist".format(sr_id, rec_id)
                                rospy.logerr(output_string)
                                recordings[
                                    rec_id].final_speaker_id = recordings[
                                        rec_id].preliminary_speaker_id

                    output_string = "response for req %d, results is %d, certanty %d" % (
                        rec_id, sr_id, certainty)
                    rospy.logdebug(output_string)
                    recordings[rec_id].is_back_from_sr = True
                    to_delete_req.append(rec_id)

                except q_Empty:
                    if time.time() - recordings[
                            rec_id].time_sent_to_sr > 3:  # no response from sr for 3 sec -> timeout
                        # print("no response for request %d in 3 sec -> timeout" % (rec_id))
                        recordings[rec_id].final_speaker_id = recordings[
                            rec_id].preliminary_speaker_id
                        recordings[rec_id].is_back_from_sr = True
                        to_delete_req.append(rec_id)

            for req in to_delete_req:
                del sr_requests[req]

            ##################################################################################
            # here we go through our recordings and handle them based on their current status
            ####################################################################################
            to_delete = []

            rec_info_to_vis = []

            for rec_id, rec in recordings.iteritems():
                if self.visualization and not rec.stopped:
                    # convert audio to energy and append it to the tuple
                    # Energy is the root mean square of the signal
                    # E = sqrt(sum(s[n]^2)/N)
                    curr_energy = np.sqrt(np.mean(np.square(rec.audio.data)))
                    if not rec.stopped:
                        rec_info_to_vis.append([
                            rec_id, rec.currentpos[0], rec.currentpos[1],
                            rec.currentpos[2], 200, curr_energy
                        ])  # 200 is the size of the blob
                    else:
                        rec_info_to_vis.append([
                            rec_id, rec.currentpos[0], rec.currentpos[1],
                            rec.currentpos[2], 50, curr_energy
                        ])

                if rec.new:
                    output_string = "new recording " + str(rec_id)
                    rospy.loginfo(output_string)
                    # get angles to all known speakers
                    rec.get_angles_to_all_speakers(self.speakers, rec.startpos)

                    # if it is wihthin a certain range to a known speaker, assign it to him
                    if len(
                            self.speakers
                    ) > 0 and rec.angles_to_speakers[0][1] < 35:  # degree
                        output_string = "preliminary assigning recording %d to speaker %d, angle is %d" % (
                            rec_id, rec.angles_to_speakers[0][0],
                            rec.angles_to_speakers[0][1])
                        rospy.loginfo(output_string)
                        rec.preliminary_speaker_id = rec.angles_to_speakers[0][
                            0]
                        rec.final_speaker_id = rec.preliminary_speaker_id  # this will be overwritten later

                    else:
                        # create a new speaker
                        self.num_speakers += 1
                        new_id = self.num_speakers
                        self.speakers[new_id] = Speaker(new_id, rec.startpos)
                        rec.preliminary_speaker_id = new_id
                        rec.final_speaker_id = rec.preliminary_speaker_id  # this will be overwritten later
                        rec.created_new_speaker = True
                        closest_ang = -999
                        if len(rec.angles_to_speakers) > 0:
                            closest_ang = rec.angles_to_speakers[0][1]
                        output_string = "creating new speaker %d for recording %d, closest angle is %d" % (
                            new_id, rec_id, closest_ang)
                        rospy.logdebug(output_string)

                        if self.num_speakers == 1:
                            rec.send_to_trainer = True

                    rec.new = False

                # elif self.speaker_recognition and (not rec.was_sent_sr and rec.audio.shape[
                #     0] > 16000 * 3):  # its longer than 3 sec, time to send it to speaker recognition
                #     sr_requests[rec_id] = Queue(maxsize=1)
                #     self.sr.test(rec.audio, rec.preliminary_speaker_id, sr_requests[rec_id])
                #     rec.was_sent_sr = True
                #     rec.time_sent_to_sr = time.time()

                elif rec.stopped:
                    # speaker finished, handle this
                    if not rec.alldone:
                        if rec.audio.shape[
                                0] < 16000 * 0.4:  # everything shorter than this we simply discard
                            output_string = "recording %d was too short, discarding" % (
                                rec_id)
                            print output_string
                            rospy.loginfo(output_string)
                            if rec.created_new_speaker:
                                del self.speakers[rec.preliminary_speaker_id]
                                output_string = "thus also deleting speaker" + str(
                                    rec.preliminary_speaker_id)
                                rospy.logdebug(output_string)
                            rec.alldone = True
                    if not rec.alldone:
                        if (rec.was_sent_sr and
                                rec.is_back_from_sr) or (not rec.was_sent_sr):
                            if not rec.was_sent_sr:
                                # it seems like this has been to short to be sent to
                                rec.final_speaker_id = rec.preliminary_speaker_id
                            self.speakers[
                                rec.final_speaker_id].pos = rec.currentpos

                            if rec.created_new_speaker and rec.sr_changed_speaker:
                                try:
                                    del self.speakers[
                                        rec.preliminary_speaker_id]
                                except:
                                    output_string = "Error deleting preliminary speaker " + str(
                                        rec.preliminary_speaker_id)
                                    print output_string
                                    rospy.logerr(output_string)

                            # TODO:
                            # send to speech to text
                            if self.bing_allowed:
                                text = self.stt.get_text(rec.audio)
                                # wavfile.write(text.encode('utf-8') + ".wav", 16000, rec.audio.data)
                            else:
                                text = "bing is not allowed yet"
                            # output_string = "Speaker {}: ".format(rec.final_speaker_id) + text.encode('utf-8')
                            output_string = text.encode('utf-8')
                            rospy.loginfo(output_string)
                            pub.publish(output_string)

                            if self.bing_allowed:
                                self.text_queue.put(output_string)
                                rospy.logdebug("text_queue lenght in main: " +
                                               str(self.text_queue.qsize()))

                            # send this to trainer
                            # if self.speaker_recognition and rec.send_to_trainer:
                            #     self.sr.train(rec.final_speaker_id, rec.audio)
                            #     output_string = "sending recording %d to trainer" % (rec_id)
                            #     rospy.logdebug(output_string)

                            output_string = "succesfully handeld recording " + str(
                                rec_id)
                            rospy.logdebug(output_string)
                            rec.alldone = True
                        else:
                            pass  # wait for the response of sr

                if rec.alldone:
                    to_delete.append(rec_id)

            for rec_id in to_delete:
                del recordings[rec_id]

            if self.visualization:
                try:
                    self.main_to_vis_queue.put(
                        {
                            'speakers': self.speakers,
                            'recordings': rec_info_to_vis
                        },
                        block=False)

                except Full:
                    # print("couldn't put data into visualization queue, its full")
                    pass

            # ---------------------------------------------------------------------------------------------------
            # new doa to led addon
            # print
            # print "------------------------------------"
            # print "speakers: "
            # print self.speakers
            # print "rec_info_to_vis: "
            # operation average
            # if len(rec_info_to_vis) > 0 and not self.bing_allowed:
            #     # print "0 -> ", rec_info_to_vis[0][0]
            #     # print "1 -> ", rec_info_to_vis[0][1]
            #     # print "2 -> ", rec_info_to_vis[0][2]
            #     # print "3 -> ", rec_info_to_vis[0][3]
            #     # print "4 -> ", rec_info_to_vis[0][4]
            #     angle_list.append(rec_info_to_vis[0][1])
            #     if len(angle_list) >= 10:
            #         publish_point_left_right(self.ledpoint_pub, sum(angle_list)/len(angle_list))
            #         angle_list = []
            # else:
            #     print "Empty dude"
            # print "------------------------------------"
            # print
            # publish_point(self.ledpoint_pub, rec_info_to_vis[1])
            # ---------------------------------------------------------------------------------------------------

        output_string = "SAM is done."
        print output_string
        rospy.loginfo(output_string)
        self.merger.stop()
        if self.visualization:
            self.visualizer.stop()
        rospy.signal_shutdown("SAM is done.")