示例#1
0
文件: tts.py 项目: AoJ/alex
    def synthesize(self, user_id, text, log="true"):
        if text == "_silence_" or text == "silence()":
            # just let the TTS generate an empty wav
            text == ""

        wav = []
        timestamp = datetime.now().strftime('%Y-%m-%d--%H-%M-%S.%f')
        fname = 'tts-{stamp}.wav'.format(stamp=timestamp)

        self.commands.send(Command('tts_start(user_id="%s",text="%s",fname="%s")' % (user_id,text,fname), 'TTS', 'HUB'))
        self.audio_out.send(Command('utterance_start(user_id="%s",text="%s",fname="%s",log="%s")' %
                            (user_id, text, fname, log), 'TTS', 'AudioOut'))

        segments = self.parse_into_segments(text)

        for i, segment_text in enumerate(segments):
            segment_wav = self.tts.synthesize(segment_text)
            segment_wav = self.remove_start_and_final_silence(segment_wav)
            if i <  len(segments) - 1:
                # add silence only for non-final segments
                segment_wav += self.gen_silence()

            wav.append(segment_wav)

            segment_wav = various.split_to_bins(segment_wav, 2 * self.cfg['Audio']['samples_per_frame'])

            for frame in segment_wav:
                self.audio_out.send(Frame(frame))

        self.commands.send(Command('tts_end(user_id="%s",text="%s",fname="%s")' % (user_id,text,fname), 'TTS', 'HUB'))
        self.audio_out.send(Command('utterance_end(user_id="%s",text="%s",fname="%s",log="%s")' %
                            (user_id, text, fname, log), 'TTS', 'AudioOut'))
示例#2
0
    def send_wav(self, filename, stream=None):
        """Send given wavfile to the dialogue system as if it was said throught
        microphone."""
        # load wav
        wav = load_wav(self.cfg, filename)
        wav = various.split_to_bins(
            wav, 2 * self.cfg['Audio']['samples_per_frame'])

        # frame by frame send it
        for frame in wav:
            if stream is not None:
                stream.write(frame)
            self.audio_record.send(Frame(frame))

        # send some silence so that VAD recognizes end of recording
        for _ in range(10):
            self.audio_record.send(Frame(b"\x00\x00" * self.cfg['Audio']['samples_per_frame']))
示例#3
0
def play(cfg, wav):
    # open the audio device
    p = pyaudio.PyAudio()

    chunk = 160
    # open stream
    stream = p.open(format=p.get_format_from_width(pyaudio.paInt32),
                    channels=1,
                    rate=cfg['Audio']['sample_rate'],
                    output=True,
                    frames_per_buffer=chunk)

    wav = various.split_to_bins(wav, chunk)
    for w in wav:
        stream.write(w)

    stream.stop_stream()
    stream.close()
    p.terminate()
示例#4
0
文件: tts.py 项目: tkraut/alex
    def synthesize(self, user_id, text, log="true"):
        if text == "_silence_" or text == "silence()":
            # just let the TTS generate an empty wav
            text == ""

        wav = []
        timestamp = datetime.now().strftime('%Y-%m-%d--%H-%M-%S.%f')
        fname = 'tts-{stamp}.wav'.format(stamp=timestamp)

        self.commands.send(
            Command(
                'tts_start(user_id="%s",text="%s",fname="%s")' %
                (user_id, text, fname), 'TTS', 'HUB'))
        self.audio_out.send(
            Command(
                'utterance_start(user_id="%s",text="%s",fname="%s",log="%s")' %
                (user_id, text, fname, log), 'TTS', 'AudioOut'))

        segments = self.parse_into_segments(text)

        for i, segment_text in enumerate(segments):
            segment_wav = self.tts.synthesize(segment_text)
            segment_wav = self.remove_start_and_final_silence(segment_wav)
            if i < len(segments) - 1:
                # add silence only for non-final segments
                segment_wav += self.gen_silence()

            wav.append(segment_wav)

            segment_wav = various.split_to_bins(
                segment_wav, 2 * self.cfg['Audio']['samples_per_frame'])

            for frame in segment_wav:
                self.audio_out.send(Frame(frame))

        self.commands.send(
            Command(
                'tts_end(user_id="%s",text="%s",fname="%s")' %
                (user_id, text, fname), 'TTS', 'HUB'))
        self.audio_out.send(
            Command(
                'utterance_end(user_id="%s",text="%s",fname="%s",log="%s")' %
                (user_id, text, fname, log), 'TTS', 'AudioOut'))
示例#5
0
    parser.add_argument('-c', "--configs", nargs='+',
                        help='additional configuration files')
    args = parser.parse_args()

    cfg = Config.load_configs(args.configs)

    session_logger = cfg['Logging']['session_logger']
    system_logger = cfg['Logging']['system_logger']

    #########################################################################
    #########################################################################
    system_logger.info("Test of the AudioIO component\n" + "=" * 120)

    wav = audio.load_wav(cfg, './resources/test16k-mono.wav')
    # split audio into frames
    wav = various.split_to_bins(wav, 2 * cfg['Audio']['samples_per_frame'])
    # remove the last frame

    aio_commands, aio_child_commands = multiprocessing.Pipe()  # used to send aio_commands
    audio_record, child_audio_record = multiprocessing.Pipe()  # I read from this connection recorded audio
    audio_play, child_audio_play = multiprocessing.Pipe()      # I write in audio to be played
    close_event = multiprocessing.Event()

    aio = AudioIO(cfg, aio_child_commands, child_audio_record, child_audio_play, close_event)

    aio.start()

    count = 0
    max_count = 2500
    while count < max_count:
        time.sleep(cfg['Hub']['main_loop_sleep_time'])
示例#6
0
        'power_decision_non_speech_threshold': 0.2,
        },
        'Hub': {
        'main_loop_sleep_time': 0.005,
        },
        'Logging': {
        'output_dir': './tmp'
        }
    }

    print "Test of the AudioIO and VAD components:"
    print "=" * 120

    wav = audio.load_wav(cfg, './resources/test16k-mono.wav')
    # split audio into frames
    wav = various.split_to_bins(wav, 2 * cfg['Audio']['samples_per_frame'])
    # remove the last frame

    aio_commands, aio_child_commands = multiprocessing.Pipe()  # used to send commands to AudioIO
    audio_record, child_audio_record = multiprocessing.Pipe()  # I read from this connection recorded audio
    audio_play, child_audio_play = multiprocessing.Pipe( )     # I write in audio to be played

    vad_commands, vad_child_commands = multiprocessing.Pipe()  # used to send commands to VAD
    vad_audio_out, vad_child_audio_out = multiprocessing.Pipe()# used to read output audio from VAD

    close_event = multiprocessing.Event()

    aio = AudioIO(cfg, aio_child_commands, child_audio_record, child_audio_play, close_event)
    vad = VAD(cfg, vad_child_commands, audio_record, vad_child_audio_out, close_event)

    command_connections = [aio_commands, vad_commands]
示例#7
0
def scores_equal_size_bins(wp_2_match):

    max_n = 100
    print "Split into equal size bins"
    wp_2_match_binned = split_to_bins(wp_2_match, len(wp_2_match) / max_n)
    # wp_2_match_binned[0][0][0] = 0.0

    # merge the same bins
    wp_2_match_binned_new = []
    for b in wp_2_match_binned:
        min = b[0][0]
        max = b[-1][0]

        if wp_2_match_binned[-1][0][0] == min and wp_2_match_binned[-1][-1][
                0] == max:
            wp_2_match_binned_new[-1].extend(b)
        else:
            wp_2_match_binned_new.append(b)

    wp_2_match_binned = wp_2_match_binned_new

    x = []
    s = []
    i = -1
    for b in wp_2_match_binned:
        min = b[0][0]
        max = b[-1][0]

        match = [wpm[1] for wpm in b]
        succ = sum(match) / len(match)
        # print "{min:.6e} -- {max:.6e} | {size} / {succ:.3f}".format(min=min, max=max, size=len(b), succ=succ)

        i += 1

        x.append(float(i))
        s.append(succ)

    xdata = [f for f in x]
    ydata = [f for f in s]

    sigma = [1.0 for f in x]
    sigma[-2] = 0.99
    sigma[-1] = 0.1

    popt, pcov = curve_fit(sig1,
                           xdata,
                           ydata,
                           sigma=sigma,
                           p0=[0.0, 1.0, 0.0, 0.0])

    print popt

    fitx = np.linspace(0, len(x), 50)
    fity = sig1(fitx, *popt)

    for xx, ss, f in zip(x, s, sig1(x, *popt)):
        print xx, ss, f

    f = P.figure()
    p = f.add_subplot(2, 1, 1)
    p.bar(x, s)
    # p = f.add_subplot(2,1,2)
    p.plot(fitx, fity)
    p.grid(True)

    P.savefig('kaldi_calibration_scores_equal_size_bins.pdf')

    print "Calibration table"

    cal_list = []
    last_f = 2.0
    last_min = 2.0
    for b, f in reversed(zip(wp_2_match_binned, sig1(x, *popt))):
        min = b[0][0]
        max = b[-1][0]

        if last_f - f > 0.02:
            cal_list.append((min, last_min, f))
            print min, f
            last_f = f
            last_min = min
    else:
        print 0.0, f
        cal_list.append((0.0, last_min, f))

    def find_approx(x):
        for i, (min, max, f) in enumerate(cal_list):
            if min <= x < max:
                return i, f

        print "ASR calibration warning: cannot map score."
        return x

    count = defaultdict(int)

    s = time.time()
    for wpm in wp_2_match:
        i, f = find_approx(wpm[0])
        count[i] += 1
    e = time.time()
    print "size {size} elapsed {time}".format(size=len(wp_2_match), time=e - s)

    pri_cal_list = []
    for i, x in enumerate(cal_list):
        pri_cal_list.append((count[i], x))

    pri_cal_list.sort()
    pri_cal_list.reverse()

    cal_list = [x[1] for x in pri_cal_list]
    s = time.time()
    for wpm in wp_2_match:
        i, f = find_approx(wpm[0])
    e = time.time()
    print "size {size} elapsed {time}".format(size=len(wp_2_match), time=e - s)

    print "=" * 120
    print "The calibration table: insert it in the config"
    print "-" * 120
    print repr(cal_list)
示例#8
0
def scores_equal_size_bins(wp_2_match):

    max_n = 100
    print "Split into equal size bins"
    wp_2_match_binned = split_to_bins(wp_2_match, len(wp_2_match)/max_n)
    # wp_2_match_binned[0][0][0] = 0.0

    # merge the same bins
    wp_2_match_binned_new = []
    for b in wp_2_match_binned:
        min = b[0][0]
        max = b[-1][0]

        if wp_2_match_binned[-1][0][0] == min and wp_2_match_binned[-1][-1][0] == max:
            wp_2_match_binned_new[-1].extend(b)
        else:
            wp_2_match_binned_new.append(b)

    wp_2_match_binned = wp_2_match_binned_new

    x = []
    s = []
    i = -1
    for b in wp_2_match_binned:
        min = b[0][0]
        max = b[-1][0]

        match = [wpm[1] for wpm in b]
        succ = sum(match) / len(match)
        # print "{min:.6e} -- {max:.6e} | {size} / {succ:.3f}".format(min=min, max=max, size=len(b), succ=succ)

        i += 1

        x.append(float(i))
        s.append(succ)

    xdata = [f for f in x]
    ydata = [f for f in s]

    sigma = [1.0 for f in x]
    sigma[-2] = 0.99
    sigma[-1] = 0.1

    popt, pcov = curve_fit(sig1, xdata, ydata, sigma = sigma, p0 = [0.0, 1.0, 0.0, 0.0] )

    print popt

    fitx = np.linspace(0, len(x), 50)
    fity = sig1(fitx, *popt)


    for xx, ss, f in zip(x, s, sig1(x, *popt)):
        print xx, ss, f

    f = P.figure()
    p = f.add_subplot(2,1,1)
    p.bar(x, s)
    # p = f.add_subplot(2,1,2)
    p.plot(fitx,fity)
    p.grid(True)

    P.savefig('kaldi_calibration_scores_equal_size_bins.pdf')

    print "Calibration table"

    cal_list = []
    last_f = 2.0
    last_min = 2.0
    for b, f in reversed(zip(wp_2_match_binned, sig1(x, *popt))):
        min = b[0][0]
        max = b[-1][0]

        if last_f - f > 0.02:
            cal_list.append((min, last_min, f))
            print min, f
            last_f = f
            last_min = min
    else:
        print 0.0, f
        cal_list.append((0.0, last_min, f))

    def find_approx(x):
        for i, (min, max, f) in enumerate(cal_list):
            if min <= x < max:
                return i, f

        print "ASR calibration warning: cannot map score."
        return x

    count = defaultdict(int)

    s = time.time()
    for wpm in wp_2_match:
        i, f = find_approx(wpm[0])
        count[i] += 1
    e = time.time()
    print "size {size} elapsed {time}".format(size=len(wp_2_match), time = e - s)

    pri_cal_list = []
    for i, x in enumerate(cal_list):
        pri_cal_list.append((count[i], x))

    pri_cal_list.sort()
    pri_cal_list.reverse()

    cal_list = [ x[1] for x in pri_cal_list]
    s = time.time()
    for wpm in wp_2_match:
        i, f = find_approx(wpm[0])
    e = time.time()
    print "size {size} elapsed {time}".format(size=len(wp_2_match), time = e - s)

    print "="*120
    print "The calibration table: insert it in the config"
    print "-"*120
    print repr(cal_list)