Exemplo n.º 1
0
def test_iterator():
    # valid file
    decoder = MP3Decoder(open('tests/streamp3/data/stereo.mp3', 'rb'))
    for chunk in decoder:
        assert isinstance(chunk, bytes)
        assert len(chunk) % 4 == 0

    # truncated file
    decoder = MP3Decoder(open('tests/streamp3/data/truncated.mp3', 'rb'))
    while True:
        chunk = decoder.read()
        if chunk:
            assert isinstance(chunk, bytes)
            assert len(chunk) % 4 == 0
        else:
            break
Exemplo n.º 2
0
    def synthesize(
        self,
        utterance: str,
        mode: str = "text",
        voice: str = "demo-male",
        profile: str = "default",
    ) -> None:
        """Synthesizes the given utterance with the voice and format provided.

        Text can be formatted as plain text (`mode="text"`),
        SSML (`mode="ssml"`), or Speech Markdown (`mode="markdown"`).

        This method also supports different formats for the synthesized
        audio via the `profile` argument. The supported profiles and
        their associated formats are:

        Args:
            utterance (str): string that needs to be rendered as speech.
            mode (str): synthesis mode to use with utterance. text, ssml, markdown, etc.
            voice (str): name of the tts voice.
            profile (str): name of the audio profile used to create the
                           resulting stream.

        """
        stream = self._client.synthesize(utterance, mode, voice, profile)

        if self._format == FORMAT_MP3:
            # decode the sequence of MP3 frames
            stream = SequenceIO(stream)
            for frame in MP3Decoder(stream):
                self._output.write(frame)
        elif self._format == FORMAT_PCM16:
            # write the raw audio to the output
            for frame in stream:
                self._output.write(frame.tobytes())
Exemplo n.º 3
0
def test_read():
    # stereo read
    decoder = MP3Decoder(open('tests/streamp3/data/stereo.mp3', 'rb'))
    while True:
        chunk = decoder.read()
        if chunk:
            assert isinstance(chunk, bytes)
            assert len(chunk) % 4 == 0
        else:
            break

    # mono read
    decoder = MP3Decoder(open('tests/streamp3/data/mono.mp3', 'rb'))
    while True:
        chunk = decoder.read()
        if chunk:
            assert isinstance(chunk, bytes)
            assert len(chunk) % 2 == 0
        else:
            break
Exemplo n.º 4
0
def test_properties():
    # constant bit rate
    decoder = MP3Decoder(open('tests/streamp3/data/cbr.mp3', 'rb'))
    assert decoder.bit_rate == 128000
    assert decoder.sample_rate == 44100
    assert decoder.num_channels == 2

    # variable bit rate
    decoder = MP3Decoder(open('tests/streamp3/data/vbr.mp3', 'rb'))
    assert decoder.bit_rate == 128000
    decoder.read()
    assert decoder.bit_rate == 32000
    assert decoder.sample_rate == 44100
    assert decoder.num_channels == 2

    # mono
    decoder = MP3Decoder(open('tests/streamp3/data/mono.mp3', 'rb'))
    assert decoder.bit_rate == 32000
    assert decoder.sample_rate == 16000
    assert decoder.num_channels == 1
Exemplo n.º 5
0
def test_iterator_with_copy():
    decoder = MP3Decoder(open('tests/streamp3/data/stereo.mp3', 'rb'),
                         provide_copy=True)
    raw_data = b''
    for chunk, raw in decoder:
        raw_data += raw
        assert isinstance(chunk, bytes)
        assert len(chunk) % 4 == 0
    ref_data = open('tests/streamp3/data/stereo.mp3', 'rb').read()
    assert len(raw_data) == len(ref_data)
    assert raw_data == ref_data
Exemplo n.º 6
0
    def synthesize(self, utterance: str, mode: str, voice: str) -> None:
        """Synthesizes the given utterance with the voice and format provided.

        Args:
            utterance (str): string that needs to be rendered as speech.
            mode (str): synthesis mode to use with utterance. text, ssml, markdown, etc.
            voice (str): name of the tts voice.

        """
        stream = self._client.synthesize(utterance, mode, voice)
        stream = SequenceIO(stream)
        for frame in MP3Decoder(stream):
            self._output.write(frame)
Exemplo n.º 7
0
def test_construction():
    # invalid stream
    with pytest.raises(Exception):
        MP3Decoder(BytesIO(b''))
    with pytest.raises(Exception):
        MP3Decoder(b'')
    with pytest.raises(Exception):
        MP3Decoder(b'invalid')
    with pytest.raises(Exception):
        MP3Decoder(open('tests/streamp3/data/id3only.mp3', 'rb'))
    with pytest.raises(Exception):
        MP3Decoder(bytes([0xFF, 0xFF, 0xFF, 0xFF]))

    # valid bytes
    MP3Decoder(open('tests/streamp3/data/noid3.mp3', 'rb').read())

    # valid stream, no ID3
    MP3Decoder(open('tests/streamp3/data/noid3.mp3', 'rb'))

    # valid stream with ID3
    MP3Decoder(open('tests/streamp3/data/withid3.mp3', 'rb'))
Exemplo n.º 8
0
def construct_pcm(audio):
    """Construct PCM data, appropriately spaced in time, given the provided
    audio object.

    Args:
        audio: A dictionary with fields 'data' (mp3 data received) and
               'data2time' (which maps the time of message receipt and
               data messages to each other)

    """
    # find headers to be sure that a header is valid, one must check
    # the rest of the frame. OMG, who designed MP3? why is there no
    # reserved word? Like limit the data to only allow 0x1*bit_depth at
    # the start of the header. Dumb
    data_arr = np.array(audio['data'])
    headers = np.where((data_arr[0:-3] == 255) *
                       (data_arr[1:-2] == 243) *
                       (np.right_shift(data_arr[2:-1], 4) != 0x0F) *
                       (np.bitwise_and(data_arr[2:-1], 0x0C) == 0x08) *
                       (data_arr[3:] == 196))[0]
    padding = (data_arr[headers+2] & 0b10)
    data2time = np.array(audio['data2time'])
    if not np.isin(data2time[:, 0], headers).all():
        raise RuntimeError(
            'Some messages did not start with well formed headers')
        # TODO: handle messed up messages gracefully
        # TODO: check frame lengths
    frame_size = ((576 / 8 * (BITRATES[data_arr[headers+2] >> 4]))/16)+padding
    # always 576 samples/frame for V2 Layer III stereo and 1152 for mono?

    # PyDub
    # For whatever reason FFMPEG expects frames of framesize + header size.
    # I think it should just be frame size, I could pad it or something, but
    # that would change the underlying compression output:
    # ipdb> AudioSegment.from_file(io.BytesIO(bytes(audio['data'][0:432+4])))

    # streamp3
    # creates chunks of 16 bit PCM, but ends up missing two chunks?
    # trying to read with only first frame or only last frame just gives nothing back.
    padding = audio['data'][headers[0]:headers[0]+4]+[0]*int(frame_size[0]-4)
    # padding is needed to make the underlying stuff works. Here is an idea
    # as to why that might be: https://thebreakfastpost.com/2016/11/26/
    #                          mp3-decoding-with-the-mad-library-weve-all-been-doing-it-wrong/
    mp3_decoder = MP3Decoder(bytes(padding+audio['data']+padding))
    all_chunks = list(mp3_decoder)
    # the first frame will just be the padding coming back (doesn't make sense to me either..)
    all_chunks = all_chunks[1:]
    #

    # int_chunks = [[(chunks_arr_elem[idx*2] << 8) | chunks_arr_elem[idx*2+1]
    #                for idx in range(int(len(chunks_arr_elem)/2))]
    #               for chunks_arr_elem in all_chunks]
    assert mp3_decoder.sample_rate == 16000
    assert mp3_decoder.num_channels == 1
    # assert (~(np.array([len(chunk) for chunk in int_chunks]) != 576)).all()
    LOGGER.info(
        'bit rate: %i, sample rate: %i, num channels: %i',
        mp3_decoder.bit_rate, mp3_decoder.sample_rate, mp3_decoder.num_channels)
    LOGGER.info(
        '%i valid headers found, but LAME only reads %i chunks', len(headers), len(all_chunks))
    filled_data = []
    end_time = audio['start']
    for frame_idx, this_chunk in enumerate(all_chunks):
        data2time = audio['data2time'][frame_idx]
        diff = data2time[1] - end_time
        if diff > 0:
            add_samples = int(diff*16000)
            end_time += add_samples/16000
            filled_data += (bytes([0, 0]*add_samples))

        filled_data += (this_chunk)
        end_time += 576/16000

    # length = len(filled_data)/mp3_decoder.sample_rate

    return [int.from_bytes(bytes(filled_data[idx*2:idx*2+2]),
                           byteorder='little', signed=True)
            for idx in range(int(len(filled_data)/2))]