def app_loopback(): webrtc_streamer( key="loopback", mode=WebRtcMode.SENDRECV, client_settings=WEBRTC_CLIENT_SETTINGS, video_transformer_factory=None, # NoOp )
def app_audio_filter(): gain = st.slider("Gain", -10.0, +20.0, 1.0, 0.05) def process_audio(frame: av.AudioFrame) -> av.AudioFrame: raw_samples = frame.to_ndarray() sound = pydub.AudioSegment( data=raw_samples.tobytes(), sample_width=frame.format.bytes, frame_rate=frame.sample_rate, channels=len(frame.layout.channels), ) sound = sound.apply_gain(gain) # Ref: https://github.com/jiaaro/pydub/blob/master/API.markdown#audiosegmentget_array_of_samples # noqa channel_sounds = sound.split_to_mono() channel_samples = [s.get_array_of_samples() for s in channel_sounds] new_samples: np.ndarray = np.array(channel_samples).T new_samples = new_samples.reshape(raw_samples.shape) new_frame = av.AudioFrame.from_ndarray(new_samples, layout=frame.layout.name) new_frame.sample_rate = frame.sample_rate return new_frame webrtc_streamer( key="audio-filter", mode=WebRtcMode.SENDRECV, rtc_configuration=RTC_CONFIGURATION, audio_frame_callback=process_audio, async_processing=True, )
def app(): class OpenCVEdgeProcessor(VideoProcessorBase): def recv(self, frame: av.VideoFrame) -> av.VideoFrame: img = frame.to_ndarray(format="bgr24") # perform edge detection img = cv2.cvtColor(cv2.Canny(img, 100, 200), cv2.COLOR_GRAY2BGR) return av.VideoFrame.from_ndarray(img, format="bgr24") def in_recorder_factory() -> MediaRecorder: return MediaRecorder( "input.flv", format="flv" ) # HLS does not work. See https://github.com/aiortc/aiortc/issues/331 def out_recorder_factory() -> MediaRecorder: return MediaRecorder("output.flv", format="flv") webrtc_streamer( key="loopback", mode=WebRtcMode.SENDRECV, rtc_configuration={ "iceServers": [{ "urls": ["stun:stun.l.google.com:19302"] }] }, media_stream_constraints={ "video": True, "audio": True, }, video_processor_factory=OpenCVEdgeProcessor, in_recorder_factory=in_recorder_factory, out_recorder_factory=out_recorder_factory, )
def app_delayed_echo(): delay = st.slider("Delay", 0.0, 5.0, 1.0, 0.05) async def queued_video_frames_callback( frames: List[av.VideoFrame], ) -> List[av.VideoFrame]: logger.debug("Delay: %f", delay) # A standalone `await ...` is interpreted as an expression and # the Streamlit magic's target, which leads implicit calls of `st.write`. # To prevent it, fix it as `_ = await ...`, a statement. # See https://discuss.streamlit.io/t/issue-with-asyncio-run-in-streamlit/7745/15 _ = await asyncio.sleep(delay) return frames async def queued_audio_frames_callback( frames: List[av.AudioFrame], ) -> List[av.AudioFrame]: _ = await asyncio.sleep(delay) return frames webrtc_streamer( key="delay", mode=WebRtcMode.SENDRECV, rtc_configuration=RTC_CONFIGURATION, queued_video_frames_callback=queued_video_frames_callback, queued_audio_frames_callback=queued_audio_frames_callback, async_processing=True, )
def app_loopback(): """ Simple video loopback """ webrtc_streamer( key="loopback", mode=WebRtcMode.SENDRECV, client_settings=WEBRTC_CLIENT_SETTINGS, video_processor_factory=None, # NoOp )
def live_mode(): """ Simple video loopback """ webrtc_streamer( key="loopback", mode=WebRtcMode.SENDRECV, client_settings=WEBRTC_CLIENT_SETTINGS, video_transformer_factory=None, # NoOp )
def app_video_filters(): """Video transforms with OpenCV""" _type = st.radio("Select transform type", ("noop", "cartoon", "edges", "rotate")) def callback(frame: av.VideoFrame) -> av.VideoFrame: img = frame.to_ndarray(format="bgr24") if _type == "noop": pass elif _type == "cartoon": # prepare color img_color = cv2.pyrDown(cv2.pyrDown(img)) for _ in range(6): img_color = cv2.bilateralFilter(img_color, 9, 9, 7) img_color = cv2.pyrUp(cv2.pyrUp(img_color)) # prepare edges img_edges = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) img_edges = cv2.adaptiveThreshold( cv2.medianBlur(img_edges, 7), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 2, ) img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB) # combine color and edges img = cv2.bitwise_and(img_color, img_edges) elif _type == "edges": # perform edge detection img = cv2.cvtColor(cv2.Canny(img, 100, 200), cv2.COLOR_GRAY2BGR) elif _type == "rotate": # rotate image rows, cols, _ = img.shape M = cv2.getRotationMatrix2D((cols / 2, rows / 2), frame.time * 45, 1) img = cv2.warpAffine(img, M, (cols, rows)) return av.VideoFrame.from_ndarray(img, format="bgr24") webrtc_streamer( key="opencv-filter", mode=WebRtcMode.SENDRECV, rtc_configuration=RTC_CONFIGURATION, video_frame_callback=callback, media_stream_constraints={"video": True, "audio": False}, async_processing=True, ) st.markdown( "This demo is based on " "https://github.com/aiortc/aiortc/blob/2362e6d1f0c730a0f8c387bbea76546775ad2fe8/examples/server/server.py#L34. " # noqa: E501 "Many thanks to the project." )
def app_streaming(): """ Media streamings """ MEDIAFILES = { "big_buck_bunny_720p_2mb.mp4": { "url": "https://sample-videos.com/video123/mp4/720/big_buck_bunny_720p_2mb.mp4", # noqa: E501 "local_file_path": HERE / "data/big_buck_bunny_720p_2mb.mp4", "type": "video", }, "big_buck_bunny_720p_10mb.mp4": { "url": "https://sample-videos.com/video123/mp4/720/big_buck_bunny_720p_10mb.mp4", # noqa: E501 "local_file_path": HERE / "data/big_buck_bunny_720p_10mb.mp4", "type": "video", }, "file_example_MP3_700KB.mp3": { "url": "https://file-examples-com.github.io/uploads/2017/11/file_example_MP3_700KB.mp3", # noqa: E501 "local_file_path": HERE / "data/file_example_MP3_700KB.mp3", "type": "audio", }, "file_example_MP3_5MG.mp3": { "url": "https://file-examples-com.github.io/uploads/2017/11/file_example_MP3_5MG.mp3", # noqa: E501 "local_file_path": HERE / "data/file_example_MP3_5MG.mp3", "type": "audio", }, } media_file_label = st.radio("Select a media file to stream", tuple(MEDIAFILES.keys())) media_file_info = MEDIAFILES[media_file_label] download_file(media_file_info["url"], media_file_info["local_file_path"]) def create_player(): return MediaPlayer(str(media_file_info["local_file_path"])) # NOTE: To stream the video from webcam, use the code below. # return MediaPlayer( # "1:none", # format="avfoundation", # options={"framerate": "30", "video_size": "1280x720"}, # ) WEBRTC_CLIENT_SETTINGS.update({ "media_stream_constraints": { "video": media_file_info["type"] == "video", "audio": media_file_info["type"] == "audio", } }) webrtc_streamer( key=f"media-streaming-{media_file_label}", mode=WebRtcMode.RECVONLY, client_settings=WEBRTC_CLIENT_SETTINGS, player_factory=create_player, )
def app_programatically_play(): """A sample of controlling the playing state from Python.""" playing = st.checkbox("Playing", value=True) webrtc_streamer( key="programatic_control", desired_playing_state=playing, mode=WebRtcMode.SENDRECV, rtc_configuration=RTC_CONFIGURATION, )
def app_fork(): COMMON_RTC_CONFIG = { "iceServers": [{ "urls": ["stun:stun.l.google.com:19302"] }] } ctx = webrtc_streamer( key="loopback", mode=WebRtcMode.SENDRECV, rtc_configuration=COMMON_RTC_CONFIG, media_stream_constraints={ "video": True, "audio": True, }, ) filter1_ctx = webrtc_streamer( key="filter1", mode=WebRtcMode.RECVONLY, video_processor_factory=OpenCVVideoProcessor, source_video_track=ctx.output_video_track, desired_playing_state=ctx.state.playing, rtc_configuration=COMMON_RTC_CONFIG, media_stream_constraints={ "video": True, "audio": True, }, ) if filter1_ctx.video_processor: filter1_ctx.video_processor.type = st.radio( "Select transform type", ("noop", "cartoon", "edges", "rotate"), key="filter1-type", ) filter2_ctx = webrtc_streamer( key="filter2", mode=WebRtcMode.RECVONLY, video_processor_factory=OpenCVVideoProcessor, source_video_track=ctx.output_video_track, desired_playing_state=ctx.state.playing, rtc_configuration=COMMON_RTC_CONFIG, media_stream_constraints={ "video": True, "audio": True, }, ) if filter2_ctx.video_processor: filter2_ctx.video_processor.type = st.radio( "Select transform type", ("noop", "cartoon", "edges", "rotate"), key="filter2-type", )
def app_customize_ui_texts(): webrtc_streamer( key="custom_ui_texts", rtc_configuration=RTC_CONFIGURATION, translations={ "start": "開始", "stop": "停止", "select_device": "デバイス選択", "media_api_not_available": "Media APIが利用できない環境です", "device_ask_permission": "メディアデバイスへのアクセスを許可してください", "device_not_available": "メディアデバイスを利用できません", "device_access_denied": "メディアデバイスへのアクセスが拒否されました", }, )
def app_delayed_echo(): DEFAULT_DELAY = 1.0 class VideoProcessor(VideoProcessorBase): delay = DEFAULT_DELAY async def recv_queued(self, frames: List[av.VideoFrame]) -> List[av.VideoFrame]: logger.debug("Delay:", self.delay) await asyncio.sleep(self.delay) return frames class AudioProcessor(AudioProcessorBase): delay = DEFAULT_DELAY async def recv_queued(self, frames: List[av.AudioFrame]) -> List[av.AudioFrame]: await asyncio.sleep(self.delay) return frames webrtc_ctx = webrtc_streamer( key="delay", mode=WebRtcMode.SENDRECV, client_settings=WEBRTC_CLIENT_SETTINGS, video_processor_factory=VideoProcessor, audio_processor_factory=AudioProcessor, async_processing=True, ) if webrtc_ctx.video_processor and webrtc_ctx.audio_processor: delay = st.slider("Delay", 0.0, 5.0, DEFAULT_DELAY, 0.05) webrtc_ctx.video_processor.delay = delay webrtc_ctx.audio_processor.delay = delay
def app_object_detection(): class NNVideoTransformer(VideoTransformerBase): def __init__(self): prototxtPath = os.path.sep.join( ['face_detector', "deploy.prototxt"]) weightsPath = os.path.sep.join( ['face_detector', "res10_300x300_ssd_iter_140000.caffemodel"]) self.faceNet = cv2.dnn.readNet(prototxtPath, weightsPath) self.emotionsNet = MODEL def transform(self, frame): image = frame.to_ndarray(format="bgr24") annotated_image = return_annotated_images(image, self.faceNet, self.emotionsNet) return annotated_image webrtc_ctx = webrtc_streamer(key="object-detection", mode=WebRtcMode.SENDRECV, client_settings=WEBRTC_CLIENT_SETTINGS, video_transformer_factory=NNVideoTransformer, async_transform=True) if webrtc_ctx.video_transformer: webrtc_ctx.video_transformer.confidence_threshold = 0.5
def app_mask_detection(): """ Video transforms with OpenCV """ class OpenCVVideoTransformer(VideoTransformerBase): type: Literal["basic", "sound_warnings"] def __init__(self) -> None: self.type = "basic" self.assembly = ModelAssembly() def transform(self, frame: av.VideoFrame) -> av.VideoFrame: img = frame.to_ndarray(format="bgr24") ## PIL ? return self.assembly.forwardFrame( img, soundOn=(self.type == "sound_warnings")) # TODO: audio play, camera focal calibration # transformer_type = st.radio( # "Select features", ("basic", "sound_warnings", "cam_calib") # ) webrtc_ctx = webrtc_streamer( key="opencv-filter", mode=WebRtcMode.SENDRECV, client_settings=WEBRTC_CLIENT_SETTINGS, video_transformer_factory=OpenCVVideoTransformer, async_transform=True, )
def app_sendonly_video(): """A sample to use WebRTC in sendonly mode to transfer frames from the browser to the server and to render frames via `st.image`.""" webrtc_ctx = webrtc_streamer( key="video-sendonly", mode=WebRtcMode.SENDONLY, rtc_configuration=RTC_CONFIGURATION, media_stream_constraints={"video": True}, ) image_place = st.empty() while True: if webrtc_ctx.video_receiver: try: video_frame = webrtc_ctx.video_receiver.get_frame(timeout=1) except queue.Empty: logger.warning("Queue is empty. Abort.") break img_rgb = video_frame.to_ndarray(format="rgb24") image_place.image(img_rgb) else: logger.warning("AudioReciver is not set. Abort.") break
def app_media_constraints(): """A sample to configure MediaStreamConstraints object""" frame_rate = 5 webrtc_streamer( key="media-constraints", mode=WebRtcMode.SENDRECV, rtc_configuration=RTC_CONFIGURATION, media_stream_constraints={ "video": {"frameRate": {"ideal": frame_rate}}, }, video_html_attrs={ "style": {"width": "50%", "margin": "0 auto", "border": "5px yellow solid"}, "controls": False, "autoPlay": True, }, ) st.write(f"The frame rate is set as {frame_rate}. Video style is changed.")
def face_detect(): class VideoTransformer(VideoTransformerBase): frame_lock: threading.Lock # `transform()` is running in another thread, then a lock object is used here for thread-safety. in_image: Union[np.ndarray, None] out_image: Union[np.ndarray, None] def __init__(self) -> None: self.frame_lock = threading.Lock() self.in_image = None self.out_image = None def transform(self, frame: av.VideoFrame) -> np.ndarray: in_image = frame.to_ndarray(format="bgr24") out_image = in_image[:, ::-1, :] # Simple flipping for example. with self.frame_lock: self.in_image = in_image self.out_image = out_image return in_image ctx = webrtc_streamer(key="snapshot", video_transformer_factory=VideoTransformer) while ctx.video_transformer: with ctx.video_transformer.frame_lock: in_image = ctx.video_transformer.in_image out_image = ctx.video_transformer.out_image if in_image is not None: gray = cv2.cvtColor(in_image, cv2.COLOR_BGR2GRAY) faces = face_classifier.detectMultiScale(gray) for (x, y, w, h) in faces: a = cv2.rectangle(in_image, (x, y), (x + w, y + h), (0, 255, 0), 2) roi_gray = gray[y:y + h, x:x + w] roi_gray = cv2.resize(roi_gray, (48, 48), interpolation=cv2.INTER_AREA ) ##Face Cropping for prediction if np.sum([roi_gray]) != 0: roi = roi_gray.astype('float') / 255.0 roi = img_to_array(roi) roi = np.expand_dims( roi, axis=0 ) ## reshaping the cropped face image for prediction prediction = classifier.predict(roi)[0] #Prediction label = emotion_labels[prediction.argmax()] label_position = (x, y) b = cv2.putText(a, label, label_position, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) # Text Adding st.image(b, channels="BGR")
def emotion_find(): webrtc_ctx = webrtc_streamer( key="loopback", mode=WebRtcMode.SENDONLY, # client_settings=WEBRTC_CLIENT_SETTINGS, ) st.markdown("## Click here to activate me") if(st.button("Activate EMP")): progress = st.progress(0) i=0 while ( int(time.time() - start_time) < capture_duration and i<100): progress.progress(i+1) i=i+1 # Find haar cascade to draw bounding box around face if webrtc_ctx.video_receiver: try: video_frame = webrtc_ctx.video_receiver.get_frame(timeout=10) facecasc = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') gray = cv2.cvtColor(video_frame.to_ndarray(format="bgr24"), cv2.COLOR_BGR2GRAY) faces = facecasc.detectMultiScale(gray,scaleFactor=1.3, minNeighbors=5) for (x, y, w, h) in faces: #cv2.rectangle(video_frame, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2) roi_gray = gray[y:y + h, x:x + w] cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray, (48, 48)), -1), 0) prediction = model.predict(cropped_img) maxindex = int(np.argmax(prediction)) emo.append(emotion_dict[maxindex]) except queue.Empty: time.sleep(0.1) continue if not emo: st.markdown("## Face Not Detected. Try Again") else: def most_frequent(List): occurence_count = Counter(List) return occurence_count.most_common(1)[0][0] user_emotion = most_frequent(emo) st.markdown("## You are "+user_emotion) songs = [f for f in listdir("songs/"+user_emotion) if isfile(join("songs/"+user_emotion, f))] for song in songs: st.markdown(song) st.audio("songs/"+user_emotion+"/"+song)
def app_audio_filter(): DEFAULT_GAIN = 1.0 class AudioProcessor(AudioProcessorBase): gain = DEFAULT_GAIN def recv(self, frame: av.AudioFrame) -> av.AudioFrame: raw_samples = frame.to_ndarray() sound = pydub.AudioSegment( data=raw_samples.tobytes(), sample_width=frame.format.bytes, frame_rate=frame.sample_rate, channels=len(frame.layout.channels), ) sound = sound.apply_gain(self.gain) # Ref: https://github.com/jiaaro/pydub/blob/master/API.markdown#audiosegmentget_array_of_samples # noqa channel_sounds = sound.split_to_mono() channel_samples = [s.get_array_of_samples() for s in channel_sounds] new_samples: np.ndarray = np.array(channel_samples).T new_samples = new_samples.reshape(raw_samples.shape) new_frame = av.AudioFrame.from_ndarray( new_samples, layout=frame.layout.name ) new_frame.sample_rate = frame.sample_rate return new_frame webrtc_ctx = webrtc_streamer( key="audio-filter", mode=WebRtcMode.SENDRECV, client_settings=WEBRTC_CLIENT_SETTINGS, audio_processor_factory=AudioProcessor, async_processing=True, ) if webrtc_ctx.audio_processor: webrtc_ctx.audio_processor.gain = st.slider( "Gain", -10.0, +20.0, DEFAULT_GAIN, 0.05 )
def app_sendonly(): """A sample to use WebRTC in sendonly mode to transfer frames from the browser to the server and to render frames via `st.image`.""" webrtc_ctx = webrtc_streamer( key="loopback", mode=WebRtcMode.SENDONLY, client_settings=WEBRTC_CLIENT_SETTINGS, ) if webrtc_ctx.video_receiver: image_loc = st.empty() while True: try: frame = webrtc_ctx.video_receiver.get_frame(timeout=1) except queue.Empty: print("Queue is empty. Stop the loop.") webrtc_ctx.video_receiver.stop() break img_rgb = frame.to_ndarray(format="rgb24") image_loc.image(img_rgb)
def app_slu(): """ Simple audio slu """ webrtc_ctx = webrtc_streamer( key="audio_slu", mode=WebRtcMode.SENDONLY, client_settings=WEBRTC_CLIENT_SETTINGS, video_transformer_factory=None, # NoOp ) if webrtc_ctx.audio_receiver: from plume.utils.transcribe import triton_transcribe_grpc_gen vad = VADUtterance() frame_len = st.empty() transcriber, audio_prep = triton_transcribe_grpc_gen( asr_host="101.53.142.218", asr_port=8001, asr_model="slu_wav2vec2", method="whole", sep=" ", ) def frame_gen(): while True: try: frame = webrtc_ctx.audio_receiver.get_frame(timeout=1) yield frame except queue.Empty: print("Queue is empty. Stop the loop.") webrtc_ctx.audio_receiver.stop() break for voice_frame in vad.stream_utterance(frame_gen()): transcript = transcriber(audio_prep(voice_frame)) frame_len.text(f"Transcript: {transcript}") # frame_len.text( # f"received voice frame of duration {voice_frames.duration_seconds}" # ) else: st.text("no audio receiver")
def app_sendonly_video(): """A sample to use WebRTC in sendonly mode to transfer frames from the browser to the server and to render frames via `st.image`.""" webrtc_ctx = webrtc_streamer( key="loopback", mode=WebRtcMode.SENDONLY, client_settings=WEBRTC_CLIENT_SETTINGS, ) image_place = st.empty() while True: if webrtc_ctx.video_receiver: try: video_frame = webrtc_ctx.video_receiver.get_frame(timeout=1) except queue.Empty: logger.warning("Queue is empty. Abort.") break img_rgb = video_frame.to_ndarray(format="rgb24") image_place.image(img_rgb) else: logger.warning("AudioReciver is not set. Abort.") break
def app_sendonly_audio(): """A sample to use WebRTC in sendonly mode to transfer audio frames from the browser to the server and visualize them with matplotlib and `st.pyplot`.""" webrtc_ctx = webrtc_streamer( key="sendonly-audio", mode=WebRtcMode.SENDONLY, audio_receiver_size=256, rtc_configuration=RTC_CONFIGURATION, media_stream_constraints={"audio": True}, ) fig_place = st.empty() fig, [ax_time, ax_freq] = plt.subplots( 2, 1, gridspec_kw={"top": 1.5, "bottom": 0.2} ) sound_window_len = 5000 # 5s sound_window_buffer = None while True: if webrtc_ctx.audio_receiver: try: audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1) except queue.Empty: logger.warning("Queue is empty. Abort.") break sound_chunk = pydub.AudioSegment.empty() for audio_frame in audio_frames: sound = pydub.AudioSegment( data=audio_frame.to_ndarray().tobytes(), sample_width=audio_frame.format.bytes, frame_rate=audio_frame.sample_rate, channels=len(audio_frame.layout.channels), ) sound_chunk += sound if len(sound_chunk) > 0: if sound_window_buffer is None: sound_window_buffer = pydub.AudioSegment.silent( duration=sound_window_len ) sound_window_buffer += sound_chunk if len(sound_window_buffer) > sound_window_len: sound_window_buffer = sound_window_buffer[-sound_window_len:] if sound_window_buffer: # Ref: https://own-search-and-study.xyz/2017/10/27/python%E3%82%92%E4%BD%BF%E3%81%A3%E3%81%A6%E9%9F%B3%E5%A3%B0%E3%83%87%E3%83%BC%E3%82%BF%E3%81%8B%E3%82%89%E3%82%B9%E3%83%9A%E3%82%AF%E3%83%88%E3%83%AD%E3%82%B0%E3%83%A9%E3%83%A0%E3%82%92%E4%BD%9C/ # noqa sound_window_buffer = sound_window_buffer.set_channels( 1 ) # Stereo to mono sample = np.array(sound_window_buffer.get_array_of_samples()) ax_time.cla() times = (np.arange(-len(sample), 0)) / sound_window_buffer.frame_rate ax_time.plot(times, sample) ax_time.set_xlabel("Time") ax_time.set_ylabel("Magnitude") spec = np.fft.fft(sample) freq = np.fft.fftfreq(sample.shape[0], 1.0 / sound_chunk.frame_rate) freq = freq[: int(freq.shape[0] / 2)] spec = spec[: int(spec.shape[0] / 2)] spec[0] = spec[0] / 2 ax_freq.cla() ax_freq.plot(freq, np.abs(spec)) ax_freq.set_xlabel("Frequency") ax_freq.set_yscale("log") ax_freq.set_ylabel("Magnitude") fig_place.pyplot(fig) else: logger.warning("AudioReciver is not set. Abort.") break
def app_streaming(): """Media streamings""" MEDIAFILES = { "big_buck_bunny_720p_2mb.mp4 (local)": { "url": "https://sample-videos.com/video123/mp4/720/big_buck_bunny_720p_2mb.mp4", # noqa: E501 "local_file_path": HERE / "data/big_buck_bunny_720p_2mb.mp4", "type": "video", }, "big_buck_bunny_720p_10mb.mp4 (local)": { "url": "https://sample-videos.com/video123/mp4/720/big_buck_bunny_720p_10mb.mp4", # noqa: E501 "local_file_path": HERE / "data/big_buck_bunny_720p_10mb.mp4", "type": "video", }, "file_example_MP3_700KB.mp3 (local)": { "url": "https://file-examples-com.github.io/uploads/2017/11/file_example_MP3_700KB.mp3", # noqa: E501 "local_file_path": HERE / "data/file_example_MP3_700KB.mp3", "type": "audio", }, "file_example_MP3_5MG.mp3 (local)": { "url": "https://file-examples-com.github.io/uploads/2017/11/file_example_MP3_5MG.mp3", # noqa: E501 "local_file_path": HERE / "data/file_example_MP3_5MG.mp3", "type": "audio", }, "rtsp://wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov": { "url": "rtsp://wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov", "type": "video", }, } media_file_label = st.radio( "Select a media source to stream", tuple(MEDIAFILES.keys()) ) media_file_info = MEDIAFILES[media_file_label] if "local_file_path" in media_file_info: download_file(media_file_info["url"], media_file_info["local_file_path"]) def create_player(): if "local_file_path" in media_file_info: return MediaPlayer(str(media_file_info["local_file_path"])) else: return MediaPlayer(media_file_info["url"]) # NOTE: To stream the video from webcam, use the code below. # return MediaPlayer( # "1:none", # format="avfoundation", # options={"framerate": "30", "video_size": "1280x720"}, # ) key = f"media-streaming-{media_file_label}" ctx: Optional[WebRtcStreamerContext] = st.session_state.get(key) if media_file_info["type"] == "video" and ctx and ctx.state.playing: _type = st.radio( "Select transform type", ("noop", "cartoon", "edges", "rotate") ) else: _type = "noop" def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame: img = frame.to_ndarray(format="bgr24") if _type == "noop": pass elif _type == "cartoon": # prepare color img_color = cv2.pyrDown(cv2.pyrDown(img)) for _ in range(6): img_color = cv2.bilateralFilter(img_color, 9, 9, 7) img_color = cv2.pyrUp(cv2.pyrUp(img_color)) # prepare edges img_edges = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) img_edges = cv2.adaptiveThreshold( cv2.medianBlur(img_edges, 7), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 2, ) img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB) # combine color and edges img = cv2.bitwise_and(img_color, img_edges) elif _type == "edges": # perform edge detection img = cv2.cvtColor(cv2.Canny(img, 100, 200), cv2.COLOR_GRAY2BGR) elif _type == "rotate": # rotate image rows, cols, _ = img.shape M = cv2.getRotationMatrix2D((cols / 2, rows / 2), frame.time * 45, 1) img = cv2.warpAffine(img, M, (cols, rows)) return av.VideoFrame.from_ndarray(img, format="bgr24") webrtc_streamer( key=key, mode=WebRtcMode.RECVONLY, rtc_configuration=RTC_CONFIGURATION, media_stream_constraints={ "video": media_file_info["type"] == "video", "audio": media_file_info["type"] == "audio", }, player_factory=create_player, video_frame_callback=video_frame_callback, ) st.markdown( "The video filter in this demo is based on " "https://github.com/aiortc/aiortc/blob/2362e6d1f0c730a0f8c387bbea76546775ad2fe8/examples/server/server.py#L34. " # noqa: E501 "Many thanks to the project." )
def app_object_detection(): """Object detection demo with MobileNet SSD. This model and code are based on https://github.com/robmarkcole/object-detection-app """ MODEL_URL = "https://github.com/robmarkcole/object-detection-app/raw/master/model/MobileNetSSD_deploy.caffemodel" # noqa: E501 MODEL_LOCAL_PATH = HERE / "./models/MobileNetSSD_deploy.caffemodel" PROTOTXT_URL = "https://github.com/robmarkcole/object-detection-app/raw/master/model/MobileNetSSD_deploy.prototxt.txt" # noqa: E501 PROTOTXT_LOCAL_PATH = HERE / "./models/MobileNetSSD_deploy.prototxt.txt" CLASSES = [ "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor", ] @st.experimental_singleton def generate_label_colors(): return np.random.uniform(0, 255, size=(len(CLASSES), 3)) COLORS = generate_label_colors() download_file(MODEL_URL, MODEL_LOCAL_PATH, expected_size=23147564) download_file(PROTOTXT_URL, PROTOTXT_LOCAL_PATH, expected_size=29353) DEFAULT_CONFIDENCE_THRESHOLD = 0.5 class Detection(NamedTuple): name: str prob: float @st.cache def get_model( session_id, ): # HACK: Pass session_id as an arg to make the cache session-specific return cv2.dnn.readNetFromCaffe(str(PROTOTXT_LOCAL_PATH), str(MODEL_LOCAL_PATH)) net = get_model(get_session_id()) confidence_threshold = st.slider( "Confidence threshold", 0.0, 1.0, DEFAULT_CONFIDENCE_THRESHOLD, 0.05 ) def _annotate_image(image, detections): # loop over the detections (h, w) = image.shape[:2] result: List[Detection] = [] for i in np.arange(0, detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > confidence_threshold: # extract the index of the class label from the `detections`, # then compute the (x, y)-coordinates of the bounding box for # the object idx = int(detections[0, 0, i, 1]) box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) (startX, startY, endX, endY) = box.astype("int") name = CLASSES[idx] result.append(Detection(name=name, prob=float(confidence))) # display the prediction label = f"{name}: {round(confidence * 100, 2)}%" cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2) y = startY - 15 if startY - 15 > 15 else startY + 15 cv2.putText( image, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2, ) return image, result result_queue = ( queue.Queue() ) # TODO: A general-purpose shared state object may be more useful. def callback(frame: av.VideoFrame) -> av.VideoFrame: image = frame.to_ndarray(format="bgr24") blob = cv2.dnn.blobFromImage( cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5 ) net.setInput(blob) detections = net.forward() annotated_image, result = _annotate_image(image, detections) # NOTE: This `recv` method is called in another thread, # so it must be thread-safe. result_queue.put(result) # TODO: return av.VideoFrame.from_ndarray(annotated_image, format="bgr24") webrtc_ctx = webrtc_streamer( key="object-detection", mode=WebRtcMode.SENDRECV, rtc_configuration=RTC_CONFIGURATION, video_frame_callback=callback, media_stream_constraints={"video": True, "audio": False}, async_processing=True, ) if st.checkbox("Show the detected labels", value=True): if webrtc_ctx.state.playing: labels_placeholder = st.empty() # NOTE: The video transformation with object detection and # this loop displaying the result labels are running # in different threads asynchronously. # Then the rendered video frames and the labels displayed here # are not strictly synchronized. while True: try: result = result_queue.get(timeout=1.0) except queue.Empty: result = None labels_placeholder.table(result) st.markdown( "This demo uses a model and code from " "https://github.com/robmarkcole/object-detection-app. " "Many thanks to the project." )
def app_loopback(): """Simple video loopback""" webrtc_streamer(key="loopback")
def app_object_detection(): """Object detection demo with MobileNet SSD. This model and code are based on https://github.com/robmarkcole/object-detection-app """ MODEL_URL = "https://github.com/robmarkcole/object-detection-app/raw/master/model/MobileNetSSD_deploy.caffemodel" # noqa: E501 MODEL_LOCAL_PATH = HERE / "./models/MobileNetSSD_deploy.caffemodel" PROTOTXT_URL = "https://github.com/robmarkcole/object-detection-app/raw/master/model/MobileNetSSD_deploy.prototxt.txt" # noqa: E501 PROTOTXT_LOCAL_PATH = HERE / "./models/MobileNetSSD_deploy.prototxt.txt" CLASSES = [ "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor", ] COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3)) download_file(MODEL_URL, MODEL_LOCAL_PATH, expected_size=23147564) download_file(PROTOTXT_URL, PROTOTXT_LOCAL_PATH, expected_size=29353) DEFAULT_CONFIDENCE_THRESHOLD = 0.5 class NNVideoTransformer(VideoTransformerBase): confidence_threshold: float def __init__(self) -> None: self._net = cv2.dnn.readNetFromCaffe( str(PROTOTXT_LOCAL_PATH), str(MODEL_LOCAL_PATH) ) self.confidence_threshold = DEFAULT_CONFIDENCE_THRESHOLD def _annotate_image(self, image, detections): # loop over the detections (h, w) = image.shape[:2] labels = [] for i in np.arange(0, detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > self.confidence_threshold: # extract the index of the class label from the `detections`, # then compute the (x, y)-coordinates of the bounding box for # the object idx = int(detections[0, 0, i, 1]) box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) (startX, startY, endX, endY) = box.astype("int") # display the prediction label = f"{CLASSES[idx]}: {round(confidence * 100, 2)}%" labels.append(label) cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2) y = startY - 15 if startY - 15 > 15 else startY + 15 cv2.putText( image, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2, ) return image, labels def transform(self, frame: av.VideoFrame) -> np.ndarray: image = frame.to_ndarray(format="bgr24") blob = cv2.dnn.blobFromImage( cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5 ) self._net.setInput(blob) detections = self._net.forward() annotated_image, labels = self._annotate_image(image, detections) # TODO: Show labels return annotated_image webrtc_ctx = webrtc_streamer( key="object-detection", mode=WebRtcMode.SENDRECV, client_settings=WEBRTC_CLIENT_SETTINGS, video_transformer_factory=NNVideoTransformer, async_transform=True, ) confidence_threshold = st.slider( "Confidence threshold", 0.0, 1.0, DEFAULT_CONFIDENCE_THRESHOLD, 0.05 ) if webrtc_ctx.video_transformer: webrtc_ctx.video_transformer.confidence_threshold = confidence_threshold st.markdown( "This demo uses a model and code from " "https://github.com/robmarkcole/object-detection-app. " "Many thanks to the project." )
def app_video_filters(): """ Video transforms with OpenCV """ class OpenCVVideoTransformer(VideoTransformerBase): type: Literal["noop", "cartoon", "edges", "rotate"] def __init__(self) -> None: self.type = "noop" def transform(self, frame: av.VideoFrame) -> av.VideoFrame: img = frame.to_ndarray(format="bgr24") if self.type == "noop": pass elif self.type == "cartoon": # prepare color img_color = cv2.pyrDown(cv2.pyrDown(img)) for _ in range(6): img_color = cv2.bilateralFilter(img_color, 9, 9, 7) img_color = cv2.pyrUp(cv2.pyrUp(img_color)) # prepare edges img_edges = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) img_edges = cv2.adaptiveThreshold( cv2.medianBlur(img_edges, 7), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 2, ) img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB) # combine color and edges img = cv2.bitwise_and(img_color, img_edges) elif self.type == "edges": # perform edge detection img = cv2.cvtColor(cv2.Canny(img, 100, 200), cv2.COLOR_GRAY2BGR) elif self.type == "rotate": # rotate image rows, cols, _ = img.shape M = cv2.getRotationMatrix2D((cols / 2, rows / 2), frame.time * 45, 1) img = cv2.warpAffine(img, M, (cols, rows)) return img webrtc_ctx = webrtc_streamer( key="opencv-filter", mode=WebRtcMode.SENDRECV, client_settings=WEBRTC_CLIENT_SETTINGS, video_transformer_factory=OpenCVVideoTransformer, async_transform=True, ) transform_type = st.radio( "Select transform type", ("noop", "cartoon", "edges", "rotate") ) if webrtc_ctx.video_transformer: webrtc_ctx.video_transformer.type = transform_type st.markdown( "This demo is based on " "https://github.com/aiortc/aiortc/blob/2362e6d1f0c730a0f8c387bbea76546775ad2fe8/examples/server/server.py#L34. " # noqa: E501 "Many thanks to the project." )
img, list_clases = VideoTransformer.transform_(img=img) return img # ================================================================================================================== st.title('Detección automática de máscaras') st.write("Esta aplicación identifica en tiempo real si tiene o no máscara.") st.write("Para más información puede ir al siguiente enlace: ") st.write("Para más información: ") status = st.sidebar.radio("Elija subir imagen o acceder a la camara web", ("Subir imagen", "Camara web")) if status == "Camara web": webrtc_streamer(key="example", video_transformer_factory=VideoTransformer) else: uploaded_file = st.file_uploader("Sube imagen", type=["png", "jpg", "jpeg"]) if uploaded_file is not None: file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8) image = cv2.imdecode(file_bytes, 1) new_image, list_clases = VideoTransformer.transform_(img=image) text = f"Hay {len(list_clases)} encontradas, {len([x for x in list_clases if x > 0])} con máscara" st.image(new_image, caption=text, use_column_width=True, channels="BGR")
def app_sst_with_video(model_path: str, lm_path: str, lm_alpha: float, lm_beta: float, beam: int): class AudioProcessor(AudioProcessorBase): frames_lock: threading.Lock frames: deque def __init__(self) -> None: self.frames_lock = threading.Lock() self.frames = deque([]) async def recv_queued(self, frames: List[av.AudioFrame]) -> av.AudioFrame: with self.frames_lock: self.frames.extend(frames) # Return empty frames to be silent. new_frames = [] for frame in frames: input_array = frame.to_ndarray() new_frame = av.AudioFrame.from_ndarray( np.zeros(input_array.shape, dtype=input_array.dtype), layout=frame.layout.name, ) new_frame.sample_rate = frame.sample_rate new_frames.append(new_frame) return new_frames webrtc_ctx = webrtc_streamer( key="speech-to-text-w-video", mode=WebRtcMode.SENDRECV, audio_processor_factory=AudioProcessor, rtc_configuration={ "iceServers": [{ "urls": ["stun:stun.l.google.com:19302"] }] }, media_stream_constraints={ "video": True, "audio": True }, ) status_indicator = st.empty() if not webrtc_ctx.state.playing: return status_indicator.write("Loading...") text_output = st.empty() stream = None while True: if webrtc_ctx.audio_processor: if stream is None: from deepspeech import Model model = Model(model_path) model.enableExternalScorer(lm_path) model.setScorerAlphaBeta(lm_alpha, lm_beta) model.setBeamWidth(beam) stream = model.createStream() status_indicator.write("Model loaded.") sound_chunk = pydub.AudioSegment.empty() audio_frames = [] with webrtc_ctx.audio_processor.frames_lock: while len(webrtc_ctx.audio_processor.frames) > 0: frame = webrtc_ctx.audio_processor.frames.popleft() audio_frames.append(frame) if len(audio_frames) == 0: time.sleep(0.1) status_indicator.write("No frame arrived.") continue status_indicator.write("Running. Say something!") for audio_frame in audio_frames: sound = pydub.AudioSegment( data=audio_frame.to_ndarray().tobytes(), sample_width=audio_frame.format.bytes, frame_rate=audio_frame.sample_rate, channels=len(audio_frame.layout.channels), ) sound_chunk += sound if len(sound_chunk) > 0: sound_chunk = sound_chunk.set_channels(1).set_frame_rate( model.sampleRate()) buffer = np.array(sound_chunk.get_array_of_samples()) stream.feedAudioContent(buffer) text = stream.intermediateDecode() text_output.markdown(f"**Text:** {text}") else: status_indicator.write("AudioReciver is not set. Abort.") break