def convert(inputstreamfile, outputstream, format,codec, channel_layout, rate):
    try:
        # set input/output locations
        inp = av.open(inputstreamfile)
        #out = av.open(f"{outputfile}", 'w')
        out = av.open(outputstream,'w')
        #out_stream = out.add_stream(f"{codec}",rate=16000)
        out_stream = out.add_stream(codec_name=codec, rate=rate)

        # resampler object details how we want to change frame information
        resampler = av.AudioResampler(
            format=av.AudioFormat(format).packed,
            layout=channel_layout,
            rate=rate
        )

 
        # decode frames and start re-encoding into new file
        for frame in inp.decode(audio=0):
            frame.pts = None  # pts is presentation time-stamp. Not relevant here.

            frame = resampler.resample(frame)  # get current working frame and re-sample it for encoding

            for p in out_stream.encode(frame):  # encode the re-sampled frame
                out.mux(p)

        out.close()

    except Exception as ex:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print(exc_type, fname, exc_tb.tb_lineno)
    def _frame_from_ndarray(array, channels, format):
        """
        Construct a frame from a numpy array.
        """

        format_dtypes = PyAVCodec._format_dtypes
        nb_channels = channels
        layout = PyAVCodec._channel_layout_names[channels]

        # map avcodec type to numpy type
        try:
            dtype = np.dtype(format_dtypes[format])
        except KeyError:
            raise ValueError(
                'Conversion from numpy array with format `%s` is not yet supported'
                % format)

        # nb_channels = len(av.AudioLayout(layout).channels)
        assert array.dtype == dtype
        assert array.ndim == 2
        if av.AudioFormat(format).is_planar:
            assert array.shape[
                0] == nb_channels, f"array.shape={array.shape}, nb_channels={nb_channels}"
            samples = array.shape[1]
        else:
            assert array.shape[0] == 1
            samples = array.shape[1] // nb_channels

        frame = av.AudioFrame(format=format, layout=layout, samples=samples)
        for i, plane in enumerate(frame.planes):
            plane.update(array[i, :])
        return frame
示例#3
0
    def run(self):
        for input in self.input:
            container = av.open(input, 'r')

            resampler = av.AudioResampler(
                format=av.AudioFormat('s16'),
                layout=2,
                rate=44100,
            )

            for packet in container.demux():
                for frame in packet.decode():
                    type = packet.stream.type

                    video_frame = None
                    audio_frame = None

                    if type == 'video':
                        new_v_frame = frame.reformat(self.w, self.h, 'rgb24')
                        new_v_frame.pts = None
                        video_frame = new_v_frame.planes[0].to_bytes()

                    if type == 'audio':
                        frame.pts = None
                        new_a_frame = resampler.resample(frame)
                        audio_frame = new_a_frame.planes[0].to_bytes()

                    self.fifo.put([video_frame, audio_frame])
示例#4
0
    def transcode(self, in_frame: np.ndarray,
                  time_info: TimeInfo) -> T.Tuple[av.AudioFrame, float]:

        # Step 1: Decode PyAudio input frame

        tmp_frame = np.frombuffer(in_frame, dtype=self.dtype)
        tmp_frame.shape = 1, -1

        chunk_length = tmp_frame.size / self.channels
        assert chunk_length == int(chunk_length)
        chunk_length = int(chunk_length)

        if av.AudioFormat(self.pyav_format).is_planar:
            assert tmp_frame.shape[0] == self.channels
            samples = tmp_frame.shape[1]
        else:
            assert tmp_frame.shape[0] == 1
            samples = tmp_frame.shape[1] // self.channels

        out_frame = av.AudioFrame(format=self.pyav_format,
                                  layout=self.pyav_layout,
                                  samples=samples)

        for i, plane in enumerate(out_frame.planes):
            plane.update(tmp_frame[i, :])

        out_frame.rate = int(self.frame_rate)
        out_frame.time_base = Fraction(1, int(self.frame_rate))
        out_frame.pts = out_frame.samples * self.num_encoded_frames
        self.num_encoded_frames += 1

        return out_frame, time_info.input_buffer_adc_time
示例#5
0
文件: media.py 项目: killinit/aiortc
def player_worker(loop, container, audio_track, video_track, quit_event, throttle_playback):
    audio_fifo = av.AudioFifo()
    audio_format = av.AudioFormat('s16')
    audio_sample_rate = 48000
    audio_samples = 0
    audio_samples_per_frame = int(audio_sample_rate * AUDIO_PTIME)
    audio_resampler = av.AudioResampler(
        format=audio_format,
        rate=audio_sample_rate)

    video_first_pts = None

    frame_time = None
    start_time = time.time()

    while not quit_event.is_set():
        try:
            frame = next(container.decode())
        except (av.AVError, StopIteration):
            if audio_track:
                asyncio.run_coroutine_threadsafe(audio_track._queue.put(None), loop)
            if video_track:
                asyncio.run_coroutine_threadsafe(video_track._queue.put(None), loop)
            break

        # read up to 1 second ahead
        if throttle_playback:
            elapsed_time = (time.time() - start_time)
            if frame_time and frame_time > elapsed_time + 1:
                time.sleep(0.1)

        if isinstance(frame, AudioFrame) and audio_track:
            if frame.format != audio_format or frame.sample_rate != audio_sample_rate:
                frame.pts = None
                frame = audio_resampler.resample(frame)

            # fix timestamps
            frame.pts = audio_samples
            frame.time_base = fractions.Fraction(1, audio_sample_rate)
            audio_samples += frame.samples

            audio_fifo.write(frame)
            while True:
                frame = audio_fifo.read(audio_samples_per_frame)
                if frame:
                    frame_time = frame.time
                    asyncio.run_coroutine_threadsafe(audio_track._queue.put(frame), loop)
                else:
                    break
        elif isinstance(frame, VideoFrame) and video_track:
            # video from a webcam doesn't start at pts 0, cancel out offset
            if frame.pts is not None:
                if video_first_pts is None:
                    video_first_pts = frame.pts
                frame.pts -= video_first_pts

            frame_time = frame.time
            asyncio.run_coroutine_threadsafe(video_track._queue.put(frame), loop)
示例#6
0
    def open(self, path, mono=False, sample_rate=None):
        """Open the audio resource."""
        self.path = path
        self.open_kargs = {'mono': mono, 'sample_rate': sample_rate}

        self.container = container = av.open(
            path,
            options={'usetoc': '1',
                     # Timeouts of I/O operations in µs and ms
                     'timeout': '5000000', 'listen_timeout': '5000'})
        # 'usetoc' is set to enable fast seek (see also
        # ffmpeg commit c43bd08 for a 'fastseek' option)
        log.debug('container: %s', container)
        stream = self.stream = \
            next(s for s in container.streams if s.type == 'audio')
        log.debug('stream: %s', stream)

        resampler = av.AudioResampler(
            format=av.AudioFormat('s16').packed,
            layout='mono' if mono else stream.layout,
            rate=sample_rate or stream.rate or 44100)

        def decode_iter():
            """Genrator reading and decoding the audio stream."""
            for packet in container.demux(stream):
                for frame in packet.decode():
                    self.last_frame_pts = frame.pts
                    # frame pts must be set to None
                    # (see https://github.com/mikeboers/PyAV/issues/281)
                    frame.pts = None
                    frame = resampler.resample(frame)
                    yield frame

        self.decode_iter = decode_iter()
        self.pos = 0

        # Duration in seconds
        if stream.duration:
            self.duration = int(stream.duration * stream.time_base)
        else:
            # It is certainly a web file
            log.info("No duration")
            self.duration = None

        self.num_channels = 1 if mono else stream.channels
        self.sample_rate = resampler.rate
示例#7
0
    def transcode(self, in_frame: np.ndarray,
                  time_info: TimeInfo) -> T.Tuple[av.AudioFrame, float]:

        # Step 1: Decode PyAudio input frame

        tmp_frame = np.fromstring(in_frame, dtype=self.dtype)

        chunk_length = len(tmp_frame) / self.channels
        assert chunk_length == int(chunk_length)
        chunk_length = int(chunk_length)

        tmp_frame = np.reshape(tmp_frame, (chunk_length, self.channels))

        # Step 2: Encode PyAV output frame

        # Flatten in column-major (Fortran-style) order
        # Effectively converting the buffer to a planar audio frame
        tmp_frame = tmp_frame.flatten(order="F")

        chunk_length = len(tmp_frame) / self.channels
        assert chunk_length == int(chunk_length)
        chunk_length = int(chunk_length)

        tmp_frame = np.reshape(tmp_frame, (self.channels, chunk_length))

        assert tmp_frame.ndim == 2
        if av.AudioFormat(self.pyav_format).is_planar:
            assert tmp_frame.shape[0] == self.channels
            samples = tmp_frame.shape[1]
        else:
            assert tmp_frame.shape[0] == 1
            samples = tmp_frame.shape[1] // self.channels

        out_frame = av.AudioFrame(format=self.pyav_format,
                                  layout=self.pyav_layout,
                                  samples=samples)

        for i, plane in enumerate(out_frame.planes):
            plane.update(tmp_frame[i, :])

        out_frame.rate = int(self.frame_rate)
        out_frame.time_base = Fraction(1, int(self.frame_rate))
        out_frame.pts = out_frame.samples * self.num_encoded_frames
        self.num_encoded_frames += 1

        return out_frame, time_info.input_buffer_adc_time
示例#8
0
    def __init__(self,
                 path,
                 output_chunk_size,
                 output_rate,
                 realtime=True,
                 time_limit=None,
                 output_format='s16',
                 output_layout='mono'):
        """

        :type path: str
        :type output_chunk_size: int
        :type output_rate: int
        :type realtime: bool
        :type time_limit: float
        """
        if output_format != 's16':
            raise NotImplementedError(
                'output_format {} is not supported.'.format(output_format))
        if output_layout != 'mono':
            raise NotImplementedError(
                'output_layout {} is not supported.'.format(output_layout))

        self._realtime = realtime
        self._chunk_size = output_chunk_size
        self._time_limit = time_limit
        self._bit_rate = output_rate * 16
        self._chunk_duration = output_chunk_size * 8 / self._bit_rate

        self._afi = AudioFrameIterable(path)
        self._resampler = av.AudioResampler(
            format=av.AudioFormat(output_format).packed,
            layout=output_layout,
            rate=output_rate,
        )

        self._buffer = b''
        self._timestamp = 0
        self._duration_processed = 0
示例#9
0
    def demultiplexer(self, container):
        # resample audio line to the given format
        resampler = av.AudioResampler(
            format=av.AudioFormat('s16'),
            layout=2,
            rate=self.audio_rate,
        )

        # loop over the container
        for packet in container.demux():
            type = packet.stream.type
            # orig_fps = packet.stream.rate

            for frame in packet.decode():
                # current time in video clip
                # timestamp = float(frame.pts * packet.stream.time_base)

                video_frame = None
                audio_frame = None

                if type == 'video':
                    # print('video pts: {}'.format(frame.pts))
                    frame.pts = self.new_vid_pts
                    new_v_frame = frame.reformat(self.w, self.h, 'yuv420p')
                    video_frame = new_v_frame

                    self.new_vid_pts += 512

                if type == 'audio':
                    # print('audio pts: {}'.format(frame.pts))
                    frame.pts = None
                    new_a_frame = resampler.resample(frame)
                    audio_frame = new_a_frame

                # push to fifo buffer
                self.fifo.put([video_frame, audio_frame])
示例#10
0
from qtproxy import Q

import av


parser = argparse.ArgumentParser()
parser.add_argument('path')
args = parser.parse_args()

container = av.open(args.path)
stream = next(s for s in container.streams if s.type == 'audio')

fifo = av.AudioFifo()
resampler = av.AudioResampler(
    format=av.AudioFormat('s16').packed,
    layout='stereo',
    rate=48000,
)



qformat = Q.AudioFormat()
qformat.setByteOrder(Q.AudioFormat.LittleEndian)
qformat.setChannelCount(2)
qformat.setCodec('audio/pcm')
qformat.setSampleRate(48000)
qformat.setSampleSize(16)
qformat.setSampleType(Q.AudioFormat.SignedInt)

output = Q.AudioOutput(qformat)
示例#11
0
    def _do_run(self) -> None:
        with withLock(self.Source._loading):
            if not self.Source.Container:
                self.Source.Container = av.open(
                    self.Source.Source, options=self.Source.AVOption
                )
            self.Source.duration = round(self.Source.Container.duration / 1000000, 2)

            self.Source.selectAudioStream = self.Source.Container.streams.audio[0]
            self.Source.FrameGenerator = self.Source.Container.decode(
                self.Source.selectAudioStream
            )

            while not self.Source._end.is_set():
                if self.Source.filter != self.Filter:
                    self.Filter = self.Source.filter

                    if self.Source.filter:
                        self.FilterGraph = AudioFilter()
                        self.FilterGraph.selectAudioStream = (
                            self.Source.selectAudioStream
                        )
                        self.FilterGraph.setFilters(self.Filter)
                    else:
                        self.FilterGraph = None

                if not self.Resampler or self.Source._haveToReloadResampler.is_set():
                    self.Resampler = av.AudioResampler(
                        format=av.AudioFormat("s16").packed, layout="stereo", rate=48000
                    )
                    self.Source._haveToReloadResampler.clear()

                _seek_locked = False
                if self.Source._seeking.locked():
                    self.Source._seeking.acquire()
                    _seek_locked = True

                Frame = next(self.Source.FrameGenerator, None)

                if _seek_locked:
                    self.Source._seeking.release()
                    self.Source.AudioFifo.reset()

                if not Frame:
                    self.Source.stop()
                    break

                _current_position = float(Frame.pts * Frame.time_base)

                if self.FilterGraph:
                    self.FilterGraph.push(Frame)
                    Frame = self.FilterGraph.pull()

                    if not Frame:
                        continue

                Frame.pts = None
                try:
                    Frame = self.Resampler.resample(Frame)
                except ValueError:
                    self.Source._haveToReloadResampler.set()
                    continue

                if not self.Source.AudioFifo.haveToFillBuffer.is_set():
                    self.Source.AudioFifo.haveToFillBuffer.wait()

                self.Source.AudioFifo.write(Frame)
                self.Source._position = _current_position

                if self.Source._waitforread.locked():
                    self.Source._waitforread.release()
示例#12
0
arg_parser.add_argument('-d', '--data', action='store_true')
arg_parser.add_argument('-f', '--format')
arg_parser.add_argument('-l', '--layout')
arg_parser.add_argument('-r', '--rate', type=int)
arg_parser.add_argument('-s', '--size', type=int, default=1024)
arg_parser.add_argument('-c', '--count', type=int, default=5)
args = arg_parser.parse_args()

ffplay = None

container = av.open(args.path)
stream = next(s for s in container.streams if s.type == 'audio')

fifo = av.AudioFifo() if args.size else None
resampler = av.AudioResampler(
    format=av.AudioFormat(args.format or stream.format.name).packed
    if args.format else None,
    layout=int(args.layout)
    if args.layout and args.layout.isdigit() else args.layout,
    rate=args.rate,
) if (args.format or args.layout or args.rate) else None

read_count = 0
fifo_count = 0
sample_count = 0

for i, packet in enumerate(container.demux(stream)):

    for frame in packet.decode():

        read_count += 1
示例#13
0
 def reloadResampler(self):
     self.Resampler = av.AudioResampler(
         format=av.AudioFormat('s16').packed,
         layout='stereo' if CHANNELS >= 2 else 'mono',
         rate=SAMPLING_RATE)
示例#14
0
    def _enqueue(self, run, finished, filepath, vid_q, aud_q, vid_info,
                 *stindex):
        aud_resampler = av.AudioResampler(
            format=av.AudioFormat(
                's16p').packed,  # WAV PCM signed 16bit planar
            layout='stereo',
        )

        def decode():
            print 'started decoding and queueing'
            container = av.open(filepath)
            streams = [container.streams[indx] for indx in stindex]
            prev_video_frame = None
            prev_video_ts = None

            v_stream = container.streams.video[0]

            # Scale down to keep things fast.
            out_longest_side = max(self._vwidth, self._vheight)
            if v_stream.height > v_stream.width:
                scale_args = "w=min(%d,iw):h=-1:flags=area" % (
                    out_longest_side, )
            else:
                scale_args = "w=-1:h=min(%d,ih):flags=area" % (
                    out_longest_side, )

            filtergraph = av.filter.Graph()
            v_src = filtergraph.add_buffer(template=v_stream)
            v_bgr = filtergraph.add("format", "pix_fmts=bgr24")
            v_scale = filtergraph.add("scale", scale_args)
            v_snk = filtergraph.add("buffersink")
            v_src.link_to(v_bgr)
            v_bgr.link_to(v_scale)
            v_scale.link_to(v_snk)

            for packet in container.demux(streams):
                run.wait()
                for frame in packet.decode():
                    play_at = float(frame.time_base *
                                    frame.pts) if frame.pts else None
                    if isinstance(frame, av.AudioFrame):
                        frame_r = aud_resampler.resample(frame)
                        raw_audio = frame_r.planes[0].to_bytes()
                        aud_q.put(raw_audio)
                    elif isinstance(frame, av.VideoFrame):
                        # NOTE: use filtergraph to convert to bgr24 instead of
                        # frame.reformat(format='bgr24').
                        #
                        # For a yuv420p frame, with SIMD optimizations on,
                        # frame.reformat(format='bgr24') will fail to convert
                        # the last width%8 pixels on each row, leaving a
                        # stripe of uninitialized data down the right side.
                        #
                        # The problem is VideoFrame allocates buffers with
                        # align=1 instead of align=SIMD_width_of_cpu.
                        #
                        # libavfilter allocates buffers with align=32 so a
                        # doing the bgr24 conversion via a filtergraph works.
                        v_src.push(frame)
                        frame_bgr = v_snk.pull()

                        # frame.to_nd_array() expects buffers to be align=1 so
                        # we have to do this by hand
                        plane = frame_bgr.planes[0]
                        dtype = numpy.uint8
                        bytes_per_pixel = 3
                        frame_h, frame_w = frame_bgr.height, frame_bgr.width
                        buffer_w = plane.line_size / bytes_per_pixel
                        frame_bgr = numpy.frombuffer(plane, dtype).reshape(
                            frame_h, buffer_w, -1)[:frame_h, :frame_w]

                        vid_q.put((prev_video_frame, prev_video_ts, play_at
                                   or 0))
                        if vid_info['rotate'] == 90:
                            prev_video_frame = numpy.rot90(frame_bgr.copy(),
                                                           k=-1)
                        elif vid_info['rotate'] == 180:
                            prev_video_frame = numpy.fliplr(
                                numpy.flipud(frame_bgr.copy()))
                        elif vid_info['rotate'] == 270:
                            prev_video_frame = numpy.rot90(frame_bgr.copy())
                        else:
                            prev_video_frame = frame_bgr.copy()
                        prev_video_ts = play_at or 0
                    else:
                        print 'unknown frame', frame
            print 'finished decoding and queueing'

        decode()
        finished.set()
示例#15
0
arg_parser.add_argument('-d', '--data', action='store_true')
arg_parser.add_argument('-f', '--format')
arg_parser.add_argument('-l', '--layout')
arg_parser.add_argument('-r', '--rate', type=int)
arg_parser.add_argument('-s', '--size', type=int, default=1024)
arg_parser.add_argument('-c', '--count', type=int, default=5)
args = arg_parser.parse_args()

ffplay = None

container = av.open(args.path)
stream = next(s for s in container.streams if s.type == 'audio')

fifo = av.AudioFifo() if args.size else None
resampler = av.AudioResampler(
    format=av.AudioFormat(args.format or stream.format.name).packed if args.format else None,
    layout=int(args.layout) if args.layout and args.layout.isdigit() else args.layout,
    rate=args.rate,
) if (args.format or args.layout or args.rate) else None

read_count = 0
fifo_count = 0
sample_count = 0

for i, packet in enumerate(container.demux(stream)):

    for frame in packet.decode():

        read_count += 1
        print('>>>> %04d' % read_count, frame)
        if args.data:
示例#16
0
def stream_doom(yturl: str, speed=None, noise=None):
  """ Returns a generator of doomified mp3 frames """

  in_file = av.open(yturl, options={'rtsp_transport': 'tcp'})
  in_stream = in_file.streams.audio[0]
  in_codec = in_stream.codec_context

  out_codec = av.CodecContext.create('mp3', 'w')
  out_codec.rate = in_codec.rate 
  out_codec.channels = in_codec.channels 
  out_codec.format = in_codec.format 

  resampler = av.AudioResampler(
      format=av.AudioFormat('s16').packed,
      layout=in_codec.layout,
      rate=in_codec.rate * 1.4 if speed is None else 1 / speed,
  )

  if in_codec.channels == 2:
    nf = 'vinyl.wav'
  elif in_codec.channels == 1:
    nf = 'vinylmono.wav'
  else:
    # TODO: Support 5.1 and other configs
    raise Exception('Too many audio channels in stream')

  noise = noise or 0.1
  wet = 1 - noise

  def moving_average(a, n=3):
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

  with wave.open(nf, 'rb') as vinyl:
    vinbuf = vinyl.readframes(int(out_codec.rate * 1.5))
    b = np.frombuffer(vinbuf, dtype='i2').reshape((1, -1))
    newframe = av.audio.frame.AudioFrame.from_ndarray(b, format='s16', layout=in_codec.layout.name)
    newframe.rate = out_codec.rate
    for p in out_codec.encode(newframe):
      yield p.to_bytes()

    for packet in in_file.demux(in_stream):
      for frame in packet.decode():
        frame.pts = None
        buf = resampler.resample(frame).to_ndarray()[0]
        # reading in a frame of the vinyl
        vinbuf = vinyl.readframes(len(buf) // in_codec.channels)
        if len(vinbuf) < len(buf) * in_codec.channels:
          vinyl.rewind()
          vinbuf = vinyl.readframes(len(buf) // in_codec.channels)
        a = buf * wet
        b = np.frombuffer(vinbuf, dtype='i2') * noise
        mod = moving_average(a + b, n=7).astype('i2').reshape((1, -1))
        
        newframe = av.audio.frame.AudioFrame.from_ndarray(mod, format='s16', layout=in_codec.layout.name)
        newframe.rate = out_codec.rate
        for p in out_codec.encode(newframe):
          yield p.to_bytes()

    for p in out_codec.encode(newframe):
      yield p.to_bytes()

  in_file.close()
示例#17
0
    def init_audio_sink(self):
        codecLatencySec = 0
        self.pa = pyaudio.PyAudio()
        self.sink = self.pa.open(format=self.pa.get_format_from_width(2),
                                 channels=self.channel_count,
                                 rate=self.sample_rate,
                                 output=True)
        # nice Python3 crash if we don't check self.sink is null. Not harmful, but should check.
        if not self.sink:
            exit()
        # codec = None
        extradata = None
        if self.audio_format == AirplayAudFmt.ALAC_44100_16_2.value:
            extradata = self.set_alac_extradata(self, 44100, 16, 2)
        elif self.audio_format == AirplayAudFmt.ALAC_44100_24_2.value:
            extradata = self.set_alac_extradata(self, 44100, 24, 2)
        elif self.audio_format == AirplayAudFmt.ALAC_48000_16_2.value:
            extradata = self.set_alac_extradata(self, 48000, 16, 2)
        elif self.audio_format == AirplayAudFmt.ALAC_48000_24_2.value:
            extradata = self.set_alac_extradata(self, 48000, 24, 2)

        if 'ALAC' in self.af:
            self.codec = av.codec.Codec('alac', 'r')
        elif 'AAC' in self.af:
            self.codec = av.codec.Codec('aac', 'r')
        elif 'OPUS' in self.af:
            self.codec = av.codec.Codec('opus', 'r')
        # PCM
        elif 'PCM' and '_16_' in self.af:
            self.codec = av.codec.Codec('pcm_s16le_planar', 'r')
        elif 'PCM' and '_24_' in self.af:
            self.codec = av.codec.Codec('pcm_s24le', 'r')
        """
        #It seems that these are not required.
        if  'ELD'   in self.af:
            codecLatencySec = (2017 / self.sample_rate)
        elif'AAC_LC'in self.af:
            codecLatencySec = (2624 / self.sample_rate)
        codecLatencySec = 0
        print('codecLatencySec:',codecLatencySec)
        """

        if self.codec is not None:
            self.codecContext = av.codec.CodecContext.create(self.codec)
            self.codecContext.sample_rate = self.sample_rate
            self.codecContext.channels = self.channel_count
            self.codecContext.format = av.AudioFormat('s' +
                                                      str(self.sample_size) +
                                                      'p')
        if extradata is not None:
            self.codecContext.extradata = extradata

        self.resampler = av.AudioResampler(
            format=av.AudioFormat('s' + str(self.sample_size)).packed,
            layout='stereo',
            rate=self.sample_rate,
        )

        audioDevicelatency = \
            self.pa.get_default_output_device_info()['defaultHighOutputLatency']
        # defaultLowOutputLatency is also available
        print(f"audioDevicelatency (sec): {audioDevicelatency:0.5f}")
        pyAudioDelay = self.sink.get_output_latency()
        print(f"pyAudioDelay (sec): {pyAudioDelay:0.5f}")
        ptpDelay = 0.002
        self.sample_delay = pyAudioDelay + audioDevicelatency + codecLatencySec + ptpDelay
        print(f"Total sample_delay (sec): {self.sample_delay:0.5f}")
示例#18
0
    def init_audio_sink(self):
        print("audit")
        self.pa = pyaudio.PyAudio()
        self.sink = self.pa.open(format=self.pa.get_format_from_width(2),
                                 channels=2,
                                 rate=44100,
                                 output=True)
        codec = None
        extradata = None
        if self.audio_format == Audio.AudioFormat.ALAC_44100_16_2.value:
            extradata = bytes([
                # Offset 0x00000000 to 0x00000035
                0x00,
                0x00,
                0x00,
                0x24,
                0x61,
                0x6c,
                0x61,
                0x63,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0x01,
                0x60,
                0x00,
                0x10,
                0x28,
                0x0a,
                0x0e,
                0x02,
                0x00,
                0xff,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0xac,
                0x44
            ])
            codec = av.codec.Codec('alac', 'r')
        elif self.audio_format == Audio.AudioFormat.AAC_LC_44100_2.value:
            codec = av.codec.Codec('aac', 'r')

        if codec is not None:
            self.codecContext = av.codec.CodecContext.create(codec)
            self.codecContext.sample_rate = 44100
            self.codecContext.channels = 2
            self.codecContext.format = AudioFormat('s16p')
        if extradata is not None:
            self.codecContext.extradata = extradata

        self.resampler = av.AudioResampler(
            format=av.AudioFormat('s16').packed,
            layout='stereo',
            rate=44100,
        )
示例#19
0
    def play_video(self, avi_file):
        try:
            import av
        except ImportError:
            return False
        if not config['enable_avi_play']:
            return False
        avi_file = os.path.join(config['game_path'], avi_file)
        if os.path.exists(avi_file):
            self.screen_real = pg.display.set_mode(
                self.screen_real.get_size(), self.screen_real.get_flags(), 32)
            video = av.open(avi_file,
                            metadata_encoding=encoding,
                            metadata_errors='replace')
            astream = next(s for s in video.streams if s.type == 'audio')
            fw = BytesIO()
            wav = wave.open(fw, 'wb')
            resampler = av.AudioResampler(
                format=av.AudioFormat('s16').packed,
                layout='stereo',
                rate=config['samplerate'],
            )
            wav.setparams(
                (2, 2, config['samplerate'], 0, 'NONE', "not compressed"))
            for packet in video.demux(astream):
                for frame in packet.decode():
                    frame = resampler.resample(frame)
                    wav.writeframes(frame.planes[0].to_bytes())
            wav.close()
            fw.seek(0)
            pg.mixer.music.load(fw)

            video = av.open(avi_file,
                            metadata_encoding=encoding,
                            metadata_errors='replace')
            vstream = next(s for s in video.streams if s.type == 'video')
            rate = int(round(1000 / vstream.rate))
            pg.mixer.music.play()
            self.clear_key_state()
            other = not hasattr(pg.image, 'frombuffer')

            try:
                for packet in video.demux(vstream):
                    for frame in packet.decode():
                        size = self.screen_real.get_size()
                        curtime = pg.time.get_ticks()
                        if other:
                            img_obj = BytesIO()
                            frame.to_image().save(img_obj, 'bmp')
                            img_obj.seek(0)
                            self.screen_real.blit(
                                pg.transform.smoothscale(
                                    pg.image.load(img_obj), size), (0, 0))
                        else:
                            data = frame.to_rgb().planes[0].to_bytes()
                            self.screen_real.blit(
                                pg.transform.smoothscale(
                                    pg.image.frombuffer(
                                        data, (288, 180), 'RGB'), size),
                                (0, 0))
                        pg.display.flip()

                        self.delay_until(curtime + rate)
                        if self.input_state.key_press:
                            raise KeyboardInterrupt
            except KeyboardInterrupt:
                pass
            finally:
                self.clear_key_state()
                if pg.mixer.get_init():
                    pg.mixer.music.pause()

            self.screen_real = pg.display.set_mode(
                self.screen_real.get_size(), self.screen_real.get_flags(), 8)
            self.set_palette(self.num_palette, self.night_palette)
            return True
        else:
            return False