Exemplo n.º 1
0
    def do_set_caps(self, icaps, ocaps):
        in_info = GstAudio.AudioInfo()
        in_info.from_caps(icaps)
        out_info = GstVideo.VideoInfo()
        out_info.from_caps(ocaps)

        self.convert_info = GstAudio.AudioInfo()
        self.convert_info.set_format(GstAudio.AudioFormat.S32,
                                     in_info.rate,
                                     in_info.channels,
                                     in_info.position)
        self.converter = GstAudio.AudioConverter.new(GstAudio.AudioConverterFlags.NONE,
                                                     in_info,
                                                     self.convert_info,
                                                     None)

        self.fig = plt.figure()
        dpi = self.fig.get_dpi()
        self.fig.patch.set_alpha(0.3)
        self.fig.set_size_inches(out_info.width / float(dpi),
                                 out_info.height / float(dpi))
        self.ax = plt.Axes(self.fig, [0., 0., 1., 1.])
        self.fig.add_axes(self.ax)
        self.ax.set_axis_off()
        self.ax.set_ylim((GLib.MININT, GLib.MAXINT))
        self.agg = self.fig.canvas.switch_backends(FigureCanvasAgg)
        self.h = None

        samplesperwindow = int(in_info.rate * in_info.channels * self.window_duration)
        self.thinning_factor = max(int(samplesperwindow / out_info.width - 1), 1)

        cap = int(samplesperwindow / self.thinning_factor)
        self.ax.set_xlim([0, cap])
        self.ringbuffer = RingBuffer(capacity=cap)
        self.ringbuffer.extend([0.0] * cap)
        self.frame_duration = Gst.util_uint64_scale_int(Gst.SECOND,
                                                        out_info.fps_d,
                                                        out_info.fps_n)
        self.next_time = self.frame_duration

        self.agg.draw()
        self.background = self.fig.canvas.copy_from_bbox(self.ax.bbox)

        self.samplesperbuffer = Gst.util_uint64_scale_int(in_info.rate * in_info.channels,
                                                          out_info.fps_d,
                                                          out_info.fps_n)
        self.next_offset = self.samplesperbuffer
        self.cur_offset = 0
        self.buf_offset = 0

        return True
Exemplo n.º 2
0
    def do_transform_ip(self, buffer: Gst.Buffer) -> Gst.FlowReturn:

        if not self._model:
            Gst.warning(
                f"No model speficied for {self}. Plugin working in passthrough mode"
            )
            return Gst.FlowReturn.OK

        # print(buffer.get_size(), buffer.duration / 10**6)

        # map Gst.Buffer to READ content
        is_ok, map_info = buffer.map(Gst.MapFlags.READ)
        if is_ok:
            # parsing audio info
            # https://lazka.github.io/pgi-docs/GstAudio-1.0/classes/AudioInfo.html
            audio_info = GstAudio.AudioInfo()
            audio_info.from_caps(self.sinkpad.get_current_caps())

            # bpf = bytes per frame (for S16LE bpf = 2 bytes)
            # np.int16 -> due to audio format S16LE
            # https://lazka.github.io/pgi-docs/GstAudio-1.0/enums.html#GstAudio.AudioFormat.S16LE
            frame = np.ndarray(map_info.size // audio_info.bpf,
                               buffer=map_info.data,
                               dtype=np.int16)

            self._frames.append(frame)
            buffer.unmap(map_info)

            cut_on_silence = self._silence_threshold > 0
            num_frames = len(self._frames)
            if num_frames >= self._max_num_frames_seq:
                self._do_speech_recognition()
            else:
                if cut_on_silence and num_frames >= self._min_num_frames_seq:
                    square = np.sqr(frame)
                    peaksquare = np.max(square)
                    squaresum = np.sum(square)

                    normalizer = 1 << 30
                    ncs = float(squaresum) / normalizer

                    print(ncs)

                    if ncs > self._silence_threshold:
                        self._silent_buffers_num += 1

                    if self._silent_buffers_num >= self._silence_duration:
                        self._do_speech_recognition()

                    # self._amplitude_mean += np.mean(frame)
                    # self._amplitude_mean /= 2
                    # rms = np.sqrt(self._amplitude_mean ** 2)

                    # if num_frames >= self._min_num_frames_seq and rms < self._rms_threshold:
                    #     self._do_speech_recognition()

        return Gst.FlowReturn.OK
Exemplo n.º 3
0
    def __init__(self):
        GstBase.BaseSrc.__init__(self)
        self.info = GstAudio.AudioInfo()

        self.freq = DEFAULT_FREQ
        self.volume = DEFAULT_VOLUME
        self.mute = DEFAULT_MUTE

        self.set_live(DEFAULT_IS_LIVE)
        self.set_format(Gst.Format.TIME)
Exemplo n.º 4
0
    def __init__(self, buffer: Gst.Buffer, audio_info: GstAudio.AudioInfo = None, caps: Gst.Caps = None):
        self.__buffer = buffer
        self.__audio_info = None

        if audio_info:
            self.__audio_info = audio_info
        elif caps:
            self.__audio_info = GstAudio.AudioInfo()
            self.__audio_info.from_caps(caps)
        else:
            raise RuntimeError("One of audio_info or caps is required")
Exemplo n.º 5
0
    def convert_speakers_to_bitmask(speaker_list):
        """Converts a list of speaker names into a Gstreamer bitmask.

        Args:
            speaker_list: A list of speaker pair names, e.g.
                ``[lr, c, lfe, lrs, lrtm]``

        Returns:
            num_channels: Number of audio channels in provided configuration.
            bitmask: A Gstreamer bitmask for the specified configuration.

        """
        chs = []
        num_channels = 0
        for item in speaker_list:
            positions = AUDIO_CHANNEL_POSITIONS[item]
            chs.extend(positions)
            num_channels = num_channels + len(positions)
            _, bitmask = GstAudio.audio_channel_positions_to_mask(chs, False)
        return num_channels, bitmask
if __name__ == '__main__':
    data = CustomData()
    
    # https://lazka.github.io/pgi-docs/#Gst-1.0/functions.html#Gst.init
    print('initializing Gst...')
    Gst.init(None)

    if not data.create_elements():
        print('failed to create elements nor a pipeline')
        sys.exit(1)

    data.setup_wavescope()

    # https://lazka.github.io/pgi-docs/#GstAudio-1.0/classes/AudioInfo.html#GstAudio.AudioInfo.set_format
    info = GstAudio.AudioInfo()
    info.set_format(GstAudio.AudioFormat.S16, SAMPLE_RATE, 1, None)
    # https://lazka.github.io/pgi-docs/#GstAudio-1.0/classes/AudioInfo.html#GstAudio.AudioInfo.to_caps
    audio_caps = info.to_caps()
    data.configure_appsrc(audio_caps)
    data.configure_appsink(audio_caps)

    data.link_always_pads()
    data.link_request_pads()

    if not data.play_pipeline():
        sys.exit(1)

    data.run_main()

    print('disposing the data...')