def test_google_vision_face_batch(): stims = ['apple', 'obama', 'thai_people'] stim_files = [join(get_test_data_path(), 'image', '%s.jpg' % s) for s in stims] stims = [ImageStim(s) for s in stim_files] ext = GoogleVisionAPIFaceExtractor(batch_size=5) result = ext.transform(stims) result = merge_results(result, format='wide', extractor_names=False, handle_annotations='first') assert result.shape == (2, 139) assert 'joyLikelihood' in result.columns assert result['joyLikelihood'][0] == 'VERY_LIKELY' assert result['joyLikelihood'][1] == 'VERY_LIKELY' video = VideoStim(join(VIDEO_DIR, 'obama_speech.mp4')) conv = FrameSamplingFilter(every=10) video = conv.transform(video) result = ext.transform(video) result = merge_results(result, format='wide', extractor_names=False) assert 'joyLikelihood' in result.columns assert result.shape == (22, 139) video = VideoStim(join(VIDEO_DIR, 'small.mp4')) video = conv.transform(video) result = ext.transform(video) result = merge_results(result, format='wide', extractor_names=False) assert 'joyLikelihood' not in result.columns assert len(result) == 0
def test_frame_sampling_video_filter(): filename = join(VIDEO_DIR, 'small.mp4') video = VideoStim(filename, onset=4.2) assert video.fps == 30 assert video.n_frames in (167, 168) assert video.width == 560 # Test frame filters conv = FrameSamplingFilter(every=3) derived = conv.transform(video) assert derived.n_frames == math.ceil(video.n_frames / 3.0) assert derived.duration == video.duration first = next(f for f in derived) assert type(first) == VideoFrameStim assert first.name == 'frame[0]' assert first.onset == 4.2 assert first.duration == 3 * (1 / 30.0) second = [f for f in derived][1] assert second.onset == 4.3 with pytest.raises(TypeError): derived.get_frame(onset=1.0) # Commented out because no longer allowing sampling filter chaining # conv = FrameSamplingFilter(hertz=15) # derived = conv.transform(derived) # assert derived.n_frames == math.ceil(video.n_frames / 6.0) # first = next(f for f in derived) # assert type(first) == VideoFrameStim # assert first.duration == 3 * (1 / 15.0) # second = [f for f in derived][1] # assert second.onset == 4.4 with pytest.raises(TypeError): conv.transform(derived)
def test_google_vision_face_batch(): obama_file = join(get_test_data_path(), 'image', 'obama.jpg') people_file = join(get_test_data_path(), 'image', 'thai_people.jpg') stims = [ImageStim(obama_file), ImageStim(people_file)] ext = GoogleVisionAPIFaceExtractor(handle_annotations='first') result = ext.transform(stims) result = ExtractorResult.merge_stims(result) assert 'face1_joyLikelihood' in result.columns assert result['face1_joyLikelihood'][0] == 'VERY_LIKELY' assert result['face1_joyLikelihood'][1] == 'VERY_LIKELY' video = VideoStim(join(get_test_data_path(), 'video', 'obama_speech.mp4')) conv = FrameSamplingFilter(every=10) video = conv.transform(video) result = ext.transform(video) result = ExtractorResult.merge_stims(result) assert 'face1_joyLikelihood' in result.columns assert result.shape == (11, 137) video = VideoStim(join(get_test_data_path(), 'video', 'small.mp4')) video = conv.transform(video) result = ext.transform(video) result = ExtractorResult.merge_stims(result) assert 'face1_joyLikelihood' not in result.columns assert result.shape == (17, 7)
def extract_image_labels(video, save_frames=False): frame_sampling_filter = FrameSamplingFilter(hertz=1) sampled_video = frame_sampling_filter.transform(video) if save_frames: # Save frames as images for i, f in enumerate(sampled_video): if i % 100 == 0: f.save('stims/frames/frame_%d.png' % i) # Use a Vision API to extract object labels ext = GoogleVisionAPILabelExtractor(max_results=10) results = ext.transform(sampled_video) res = merge_results(results, metadata=False, extractor_names='multi') # Clean and write out data res = res.fillna(0) label_key = 'GoogleVisionAPILabelExtractor' res[label_key] = np.round(res[label_key]) new_cols = [] for col in res.columns.values: if col[0].startswith('Google'): new_cols.append(col[1].encode('utf-8')) else: new_cols.append(col[0]) res.columns = new_cols res.to_csv('events/raw_visual_events.csv')
def resample_video(video_file, sampling_rate): """ This function resamples a video to the desired sampling rate. Can be useful for making video with high sampling rates more tractable for analysis. Parameters ---------- video_file: str file path to video to be resampled. sampling_rate: float Desired sampling rate in Hz Returns ------- resampled_video: pliers video object with resampled video frames """ from pliers.stimuli import VideoStim from pliers.filters import FrameSamplingFilter video = VideoStim(video_file) resamp_filter = FrameSamplingFilter(hertz=sampling_rate) resampled_video = resamp_filter.transform(video) return resampled_video
def test_frame_sampling_cv2(): pytest.importorskip('cv2') filename = join(VIDEO_DIR, 'small.mp4') video = VideoStim(filename) conv = FrameSamplingFilter(top_n=5) derived = conv.transform(video) assert derived.n_frames == 5 assert type(next(f for f in derived)) == VideoFrameStim
def test_frame_sampling_video_filter2(): filename = join(VIDEO_DIR, 'obama_speech.mp4') video = VideoStim(filename, onset=4.2) assert video.fps == 12 assert video.n_frames == 105 # Test frame indices conv = FrameSamplingFilter(every=3) derived = conv.transform(video) assert derived.n_frames == 35 assert derived.frame_index[4] == 12 conv = FrameSamplingFilter(hertz=3) derived = conv.transform(video) assert derived.n_frames == 27 assert derived.frame_index[3] == 12 conv = FrameSamplingFilter(hertz=24) derived = conv.transform(video) assert derived.n_frames == 210 assert derived.frame_index[4] == 2 video.fps = 11.8 conv = FrameSamplingFilter(hertz=1) derived = conv.transform(video) assert derived.n_frames == 9 assert derived.frame_index[4] == 47 assert derived.frame_index[5] == 59
def test_big_pipeline(): pytest.importorskip('pygraphviz') filename = join(get_test_data_path(), 'video', 'obama_speech.mp4') video = VideoStim(filename) visual_nodes = [(FrameSamplingFilter(every=15), [ (TesseractConverter(), [LengthExtractor()]), VibranceExtractor(), 'BrightnessExtractor', ])] audio_nodes = [(VideoToAudioConverter(), [WitTranscriptionConverter(), 'LengthExtractor'], 'video_to_audio')] graph = Graph() graph.add_nodes(visual_nodes) graph.add_nodes(audio_nodes) results = graph.run(video, merge=False) result = merge_results(results, format='wide', extractor_names='multi') # Test that pygraphviz outputs a file drawfile = next(tempfile._get_candidate_names()) graph.draw(drawfile) assert exists(drawfile) os.remove(drawfile) assert ('LengthExtractor', 'text_length') in result.columns assert ('VibranceExtractor', 'vibrance') in result.columns # assert not result[('onset', '')].isnull().any() assert 'text[negotiations]' in result['stim_name'].values assert 'frame[90]' in result['stim_name'].values
def test_progress_bar(capfd): video_dir = join(get_test_data_path(), 'video') video = VideoStim(join(video_dir, 'obama_speech.mp4')) conv = FrameSamplingFilter(hertz=2) old_val = config.get_option('progress_bar') config.set_option('progress_bar', True) derived = conv.transform(video) out, err = capfd.readouterr() assert 'Video frame:' in err and '100%' in err config.set_option('progress_bar', False) derived = conv.transform(video) out, err = capfd.readouterr() assert 'Video frame:' not in err and '100%' not in err config.set_option('progress_bar', old_val)
def test_frame_sampling_video_filter(): filename = join(VIDEO_DIR, 'small.mp4') video = VideoStim(filename, onset=4.2) assert video.fps == 30 assert video.n_frames in (167, 168) assert video.width == 560 # Test frame filters conv = FrameSamplingFilter(every=3) derived = conv.transform(video) assert derived.n_frames == math.ceil(video.n_frames / 3.0) first = next(f for f in derived) assert type(first) == VideoFrameStim assert first.name == 'frame[0]' assert first.onset == 4.2 assert first.duration == 3 * (1 / 30.0) second = [f for f in derived][1] assert second.onset == 4.3 # Should refilter from original frames conv = FrameSamplingFilter(hertz=15) derived = conv.transform(derived) assert derived.n_frames == math.ceil(video.n_frames / 6.0) first = next(f for f in derived) assert type(first) == VideoFrameStim assert first.duration == 3 * (1 / 15.0) second = [f for f in derived][1] assert second.onset == 4.4
def extract_faces(video): frame_sampling_filter = FrameSamplingFilter(hertz=1) sampled_video = frame_sampling_filter.transform(video) ext = GoogleVisionAPIFaceExtractor() results = ext.transform(sampled_video) res = merge_results(results, metadata=False, format='long', extractor_names=False, object_id=False) res = res[res['feature'] == 'face_detectionConfidence'] res = res.drop(['order'], axis=1) res = res.fillna(0) res['value'] = np.round(res['value']) res.rename(columns={ 'value': 'modulation', 'feature': 'trial_type' }, inplace=True) res.to_csv('events/visual_face_events.csv')
If we use this extractor to transform a `VideoStim`, *pliers* will implicitly use the `FrameSamplingConverter`. However, it will do so with the default parameters, which in this case would extract every frame. This is too fine grained, as we want to save ourselves the computation, and only sample infrequently for this example. To convert the `Sherlock` stimuli from video, to `ImageStim` frames sampled at 0.1Hz (i.e., one frame every 10 seconds), we need to: First, load the `VideoStim`, and use the `FrameSamplingFilter` to subsample this set of `ImageStims` at 0.1hz. from pliers.stimuli import VideoStim from pliers.filters import FrameSamplingFilter video = VideoStim(sherlock_video) # Sample at 0.1 Hz filt = FrameSamplingFilter(hertz=0.1) selected_frames = filt.transform(video) # Number of sampled frames selected_frames.n_frames ```{note} `FrameSamplingFilter` expects a *collection* of `ImageStims` as input, and returns a subsampled collection of `ImageStims`. However, here it can take `VideoStim` as input, as *pliers* will *impliclty* convert `VideoStim` -> `ImageStim`. Since there are no important parameters to modify in this step, we can let *pliers* handle it for us, instead of doing it explicitly. ``` Next, we can use the `FaceRecognitionFaceLocationsExtractor` to detect and label face locations in the subset of frames Note that since we transformed a collection of frames, the result of this operation is a *list* of `ExtractedResult` objects. To merge these objects into a single pandas DataFrame, we can use the helper function `merge_results`