示例#1
0
def rec_wav_file(output_dir, wav_path):
    """ Recognise speech in wav file and profile speech recognition.

    The decoding and ASR output extraction times are estimated.

    Args:
        cfg (dict): Alex configuration with setting for speech recognition
        wav_path (str): Path to Wave file which is recognised

    Returns:
        Tuple of decodeded ASR hypothesis, time of decoding, time of hypothesis extraction
    """
    pcm = load_wav(cfg, wav_path)
    frame = Frame(pcm)

    start = time.time()
    asr.rec_in(frame)
    rec_in_end = time.time()
    res = asr.hyp_out()
    hyp_out_end = time.time()

    try:
        save_lattice(asr.get_last_lattice(), output_dir, wav_path)
    except AttributeError:
        pass

    asr.flush()

    return res, rec_in_end - start, hyp_out_end - rec_in_end
示例#2
0
    def read_write_audio(self):
        """Send as much possible of the available data to the output and read as much as possible from the input.

        It should be a non-blocking operation.
        """

        if (self.local_audio_play
                and (self.mem_player.get_write_available() >
                     self.cfg['Audio']['samples_per_frame'] * 2)):
            # send a frame from input to be played
            data_play = self.local_audio_play.popleft()

            if self.audio_playing and isinstance(data_play, Frame):
                if len(data_play
                       ) == self.cfg['Audio']['samples_per_frame'] * 2:
                    self.last_frame_id = self.mem_player.put_frame(
                        data_play.payload)
                    self.cfg['Logging']['session_logger'].rec_write(
                        self.audio_playing, data_play.payload)

            elif isinstance(data_play, Command):
                if data_play.parsed['__name__'] == 'utterance_start':
                    self.audio_playing = data_play.parsed['fname']
                    self.message_queue.append((Command(
                        'play_utterance_start(user_id="{uid}",fname="{fname}")'
                        .format(uid=data_play.parsed['user_id'],
                                fname=data_play.parsed['fname']), 'VoipIO',
                        'HUB'), self.last_frame_id))
                    try:
                        if data_play.parsed['log'] == "true":
                            self.cfg['Logging']['session_logger'].rec_start(
                                "system", data_play.parsed['fname'])
                    except SessionLoggerException as e:
                        self.cfg['Logging']['system_logger'].exception(e)

                if self.audio_playing and data_play.parsed[
                        '__name__'] == 'utterance_end':
                    self.audio_playing = None
                    self.message_queue.append((Command(
                        'play_utterance_end(user_id="{uid}",fname="{fname})'.
                        format(uid=data_play.parsed['user_id'],
                               fname=data_play.parsed['fname']), 'VoipIO',
                        'HUB'), self.last_frame_id))
                    try:
                        if data_play.parsed['log'] == "true":
                            self.cfg['Logging']['session_logger'].rec_end(
                                data_play.parsed['fname'])
                    except SessionLoggerException as e:
                        self.cfg['Logging']['system_logger'].exception(e)

        if (self.mem_capture.get_read_available() >
                self.cfg['Audio']['samples_per_frame'] * 2):
            # Get and send recorded data, it must be read at the other end.
            data_rec = self.mem_capture.get_frame()

            # send the audio only if the call is connected
            # ignore any audio signal left after the call was disconnected
            if self.audio_recording:
                self.audio_record.send(Frame(data_rec))
示例#3
0
    def send_wav(self, filename, stream=None):
        """Send given wavfile to the dialogue system as if it was said throught
        microphone."""
        # load wav
        wav = load_wav(self.cfg, filename)
        wav = various.split_to_bins(
            wav, 2 * self.cfg['Audio']['samples_per_frame'])

        # frame by frame send it
        for frame in wav:
            if stream is not None:
                stream.write(frame)
            self.audio_record.send(Frame(frame))

        # send some silence so that VAD recognizes end of recording
        for _ in range(10):
            self.audio_record.send(Frame(b"\x00\x00" * self.cfg['Audio']['samples_per_frame']))
示例#4
0
    def on_client_message_received(self, payload):
        msg = ClientToAlex()
        msg.ParseFromString(payload)
        if msg.key == self.key:
            decoded = msg.speech

            self.audio_record.send(Frame(decoded))
            self.update_current_utterance_id(msg.currently_playing_utterance)
示例#5
0
文件: tts.py 项目: tkraut/alex
    def synthesize(self, user_id, text, log="true"):
        if text == "_silence_" or text == "silence()":
            # just let the TTS generate an empty wav
            text == ""

        wav = []
        timestamp = datetime.now().strftime('%Y-%m-%d--%H-%M-%S.%f')
        fname = 'tts-{stamp}.wav'.format(stamp=timestamp)

        self.commands.send(
            Command(
                'tts_start(user_id="%s",text="%s",fname="%s")' %
                (user_id, text, fname), 'TTS', 'HUB'))
        self.audio_out.send(
            Command(
                'utterance_start(user_id="%s",text="%s",fname="%s",log="%s")' %
                (user_id, text, fname, log), 'TTS', 'AudioOut'))

        segments = self.parse_into_segments(text)

        for i, segment_text in enumerate(segments):
            segment_wav = self.tts.synthesize(segment_text)
            segment_wav = self.remove_start_and_final_silence(segment_wav)
            if i < len(segments) - 1:
                # add silence only for non-final segments
                segment_wav += self.gen_silence()

            wav.append(segment_wav)

            segment_wav = various.split_to_bins(
                segment_wav, 2 * self.cfg['Audio']['samples_per_frame'])

            for frame in segment_wav:
                self.audio_out.send(Frame(frame))

        self.commands.send(
            Command(
                'tts_end(user_id="%s",text="%s",fname="%s")' %
                (user_id, text, fname), 'TTS', 'HUB'))
        self.audio_out.send(
            Command(
                'utterance_end(user_id="%s",text="%s",fname="%s",log="%s")' %
                (user_id, text, fname, log), 'TTS', 'AudioOut'))
示例#6
0
 def rec_wav_file(self, wav_path):
     pcm = load_wav(self.cfg, wav_path)
     frame = Frame(pcm)
     res = self.rec_wave(frame)
     self.flush()
     return res
示例#7
0
    # Actively call a number configured.
    # vio_commands.send(Command('make_call(destination="sip:4366@SECRET:5066")', 'HUB', 'VoipIO'))

    count = 0
    max_count = 50000
    wav = None

    while count < max_count:
        time.sleep(cfg['Hub']['main_loop_sleep_time'])
        count += 1

        # write one frame into the audio output
        if wav:
            data_play = wav.pop(0)
            #print len(wav), len(data_play)
            vio_play.send(Frame(data_play))

        # read all recorded audio
        if vio_record.poll():
            data_rec = vio_record.recv()

        # read all messages from VoipIO
        if vio_commands.poll():
            command = vio_commands.recv()

            if isinstance(command, Command):
                if command.parsed[
                        '__name__'] == "incoming_call" or command.parsed[
                            '__name__'] == "make_call":
                    wav = audio.load_wav(cfg, './resources/test16k-mono.wav')
                    # split audio into frames
示例#8
0
    def read_write_audio(self, p, stream, wf, play_buffer):
        """Send some of the available data to the output.
        It should be a non-blocking operation.

        Therefore:
          1) do not send more then play_buffer_frames
          2) send only if stream.get_write_available() is more then the frame size
        """
        if self.audio_play.poll():
            while self.audio_play.poll() \
                and len(play_buffer) < self.cfg['AudioIO']['play_buffer_size'] \
                    and stream.get_write_available() > self.cfg['Audio']['samples_per_frame']:

                # send to play frames from input
                data_play = self.audio_play.recv()
                if isinstance(data_play, Frame):
                    stream.write(data_play.payload)

                    play_buffer.append(data_play)

                    if self.cfg['AudioIO']['debug']:
                        print '.',
                        sys.stdout.flush()

                elif isinstance(data_play, Command):
                    if data_play.parsed['__name__'] == 'utterance_start':
                        self.commands.send(
                            Command('play_utterance_start()', 'AudioIO',
                                    'HUB'))
                    if data_play.parsed['__name__'] == 'utterance_end':
                        self.commands.send(
                            Command('play_utterance_end()', 'AudioIO', 'HUB'))

        else:
            data_play = Frame(b"\x00\x00" *
                              self.cfg['Audio']['samples_per_frame'])

            play_buffer.append(data_play)
            if self.cfg['AudioIO']['debug']:
                print '.',
                sys.stdout.flush()

        # record one packet of audio data
        # it will be blocked until the data is recorded
        data_rec = stream.read(self.cfg['Audio']['samples_per_frame'])
        # send recorded data it must be read at the other end
        self.audio_record.send(Frame(data_rec))

        # get played audio block
        data_play = play_buffer.pop(0)

        # send played audio
        # FIXME: I should save what I am playing
        # self.audio_played.send(data_play)

        # save the recorded and played data
        data_stereo = bytearray()
        for i in range(self.cfg['Audio']['samples_per_frame']):
            data_stereo.extend(data_rec[i * 2])
            data_stereo.extend(data_rec[i * 2 + 1])

            # there might not be enough data to be played
            # then add zeros
            try:
                data_stereo.extend(data_play[i * 2])
            except IndexError:
                data_stereo.extend(b'\x00')

            try:
                data_stereo.extend(data_play[i * 2 + 1])
            except IndexError:
                data_stereo.extend(b'\x00')

        wf.writeframes(data_stereo)
示例#9
0
def main(dirname, outfname, cfg, skip=0, ignore_list_file=None):
    """

    Arguments:
        dirname -- the directory to search for WAVs
        outfname -- path towards the file to output to
        cfg -- a configuration dictionary (of the Config class)
        skip -- how many wavs to skip (default: 0)
        ignore_list_file -- a file open for reading whose lines specify path
            globs for logs that should be ignored, or None if no such file
            should be used.  The format of this file is described in
            some alex/corpustools scripts.

    """

    # Fetch relevant config arguments.
    frame_size = cfg['corpustools']['get_jasr_confnets']['frame_size']
    rt_ratio = cfg['corpustools']['get_jasr_confnets']['rt_ratio']
    sleep_time = rt_ratio * frame_size / 32000.

    wavs = sorted(get_wav_fnames(dirname, ignore_list_file), key=itemgetter(1))

    jul = None
    try:
        with codecs.open(outfname, 'a+', encoding='UTF-8') as outfile:
            for wav_fname, wav_id in wavs[skip:]:
                # Load the wav.
                mywav = load_wav(cfg, wav_fname)
                # Start Julius.
                if jul is None:
                    jul, grep, errfile = start_julius(cfg, on_no_context)

                # Insist on feeding all the input data to Julius, regardless of
                # how many times it crashes.
                exception = 1
                while exception:
                    try:
                        for startidx in xrange(0, len(mywav), frame_size):
                            jul.rec_in(
                                Frame(mywav[startidx:startidx + frame_size]))
                            sleep(sleep_time)
                        # sleep(rt_ratio * len(mywav) / 32000.)
                    except socket.error as e:
                        # Julius crashing results in
                        # error: [Errno 104] Connection reset by peer
                        # Catch only that one.
                        if e.errno != 104:
                            raise e
                        exception = e
                        traceback.print_exc()
                        print "get_jasr_confnets: Restarting Julius."
                        clean_up(jul, grep, errfile)
                        jul, grep, errfile = start_julius(cfg, on_no_context)
                    else:
                        exception = None

                exception = None
                try:
                    hyp = jul.hyp_out()
                except ASRException as e:
                    exception = e
                except socket.error as e:
                    # Julius crashing results in
                    # error: [Errno 104] Connection reset by peer
                    # Catch only that one.
                    if e.errno != 104:
                        raise e
                    exception = e
                if exception is not None:
                    traceback.print_exc()
                    clean_up(jul, grep, errfile)
                    jul = None
                    hyp = 'None'
                    exception = None

                outfile.write('{id_} => {hyp!r}\n'.format(id_=wav_id, hyp=hyp))
                sys.stderr.write('.')
                sys.stderr.flush()
    finally:
        if jul is not None:
            clean_up(jul, grep, errfile)