def test_indico_api_text_extractor(): ext = IndicoAPITextExtractor(api_key=os.environ['INDICO_APP_KEY'], models=['emotion', 'personality']) # With ComplexTextStim input srtfile = join(get_test_data_path(), 'text', 'wonderful.srt') srt_stim = ComplexTextStim(srtfile, onset=4.2) result = ExtractorResult.merge_stims(ext.transform(srt_stim)) outdfKeysCheck = { 'onset', 'duration', 'emotion_anger', 'emotion_fear', 'emotion_joy', 'emotion_sadness', 'emotion_surprise', 'personality_openness', 'personality_extraversion', 'personality_agreeableness', 'personality_conscientiousness'} meta_columns = {'source_file', 'history', 'class', 'filename'} assert set(result.columns) - set(['stim_name']) == outdfKeysCheck | meta_columns assert result['onset'][1] == 92.622 # With TextStim input ts = TextStim(text="It's a wonderful life.") result = ext.transform(ts).to_df() assert set(result.columns) == outdfKeysCheck assert len(result) == 1
def _extract(self, stim): data = self._stft(stim) events = [] time_bins = np.arange(0., stim.duration - self.frame_size, self.hop_size) if isinstance(self.freq_bins, int): bins = [] bin_size = data.shape[1] / self.freq_bins for i in range(self.freq_bins): bins.append((i * bin_size, (i + 1) * bin_size)) self.freq_bins = bins features = ['%d_%d' % fb for fb in self.freq_bins] index = [tb for tb in time_bins] values = np.zeros((len(index), len(features))) for i, fb in enumerate(self.freq_bins): start, stop = fb values[:, i] = data[:, start:stop].mean(1) values = np.nan_to_num(values) return ExtractorResult(values, stim, self, features=features, onsets=index, durations=self.hop_size)
def _extract(self, stim): required_shape = (299, 299, 3) x = stim.data if x.ndim != 3: raise ValueError("Stim data must have rank 3 but got rank {}".format(x.ndim)) if x.shape != required_shape: x = _resize_image(x, required_shape[:-1]) # Add batch dimension. x = x[None] # Normalize the features. x = tf.keras.applications.inception_v3.preprocess_input(x) preds = self.model.predict(x, batch_size=1) # This produces a nested list. There is one sub-list per sample in the # batch, and each sub-list contains `self.num_predictions` tuples with # `(ID, human-readable-label, probability)`. decoded = tf.keras.applications.inception_v3.decode_predictions( preds, top=self.num_predictions) # We assume there is only one sample in the batch. decoded = decoded[0] values = [t[2] for t in decoded] features = [t[1] for t in decoded] return ExtractorResult([values], stim, self, features=features)
def _extract(self, stim): from pliers.external import pySaliencyMap # pySaliencyMap from https://github.com/akisato-/pySaliencyMap data = stim.data # Initialize variables h, w, c = stim.data.shape sm = pySaliencyMap.pySaliencyMap(h, w) # Compute saliency maps and store full maps as derivatives stim.derivatives = dict() stim.derivatives['saliency_map'] = sm.SMGetSM(stim.data) stim.derivatives['binarized_map'] = sm.SMGetBinarizedSM( stim.data) #thresholding done using Otsu # Compute summary statistics output = {} output['max_saliency'] = np.max(stim.derivatives['saliency_map']) output['max_y'], output['max_x'] = [ list(i)[0] for i in np.where( stim.derivatives['saliency_map'] == output['max_saliency']) ] output['frac_high_saliency'] = np.sum( stim.derivatives['binarized_map'] / 255.0) / (h * w) return ExtractorResult(np.array([list(output.values())]), stim, self, features=list(output.keys()))
def _extract(self, stim): amps = stim.audio.data sampling_rate = stim.audio.sampling_rate elements = stim.complex_text.elements values, onsets, durations = [], [], [] for i, el in enumerate(elements): onset = sampling_rate * el.onset onsets.append(onset) duration = sampling_rate * el.duration durations.append(duration) r_onset = np.round(onset).astype(int) r_offset = np.round(onset + duration).astype(int) if not r_offset <= amps.shape[0]: raise Exception('Block ends after data.') mean_amplitude = np.mean(amps[r_onset:r_offset]) values.append(mean_amplitude) return ExtractorResult(values, stim, self, features=['mean_amplitude'], onsets=onsets, durations=durations)
def _extract(self, stim): data = self._stft(stim) time_bins = np.arange(0., stim.duration - self.frame_size, self.hop_size) if isinstance(self.freq_bins, int): bins = [] bin_size = int(data.shape[1] / self.freq_bins) for i in range(self.freq_bins): if i == self.freq_bins - 1: bins.append((i * bin_size, data.shape[1])) else: bins.append((i * bin_size, (i + 1) * bin_size)) self.freq_bins = bins features = ['%d_%d' % fb for fb in self.freq_bins] offset = 0.0 if stim.onset is None else stim.onset index = [tb + offset for tb in time_bins] values = np.zeros((len(index), len(features))) for i, fb in enumerate(self.freq_bins): start, stop = fb values[:, i] = data[:, start:stop].mean(1) values[np.isnan(values)] = 0. values[np.isinf(values)] = 0. return ExtractorResult(values, stim, self, features=features, onsets=index, durations=self.hop_size, orders=list(range(len(index))))
def _extract(self, stim): if isinstance(stim, ImageStim): stim = [stim] request = self._build_request(stim) responses = self._query_api(request) features = [] data = [] for i, response in enumerate(responses): if response and self.response_object in response: annotations = response[self.response_object] feat, values = self._parse_annotations(annotations) features += feat data += values elif 'error' in response: raise Exception(response['error']['message']) data = [data] onsets = [ stim[i].onset if hasattr(stim[i], 'onset') else i for i in range(len(responses)) ] durations = [stim[i].duration for i in range(len(responses))] return ExtractorResult(data, stim, self, features=features, onsets=onsets, durations=durations)
def _extract(self, stim): values = self._get_values(stim) if self._feature == 'beat_track': beats = np.array(values[1]) values = beats values = values.T n_frames = len(values) feature_names = listify(self.get_feature_names()) onsets = librosa.frames_to_time(range(n_frames), sr=stim.sampling_rate, hop_length=self.hop_length) onsets = onsets + stim.onset if stim.onset else onsets durations = [self.hop_length / float(stim.sampling_rate)] * n_frames return ExtractorResult(values, stim, self, features=feature_names, onsets=onsets, durations=durations, orders=list(range(n_frames)))
def _extract(self, stim): flows = [] onsets = [] durations = [] for i, f in enumerate(stim): img = f.data img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if i == 0: last_frame = img flow = cv2.calcOpticalFlowFarneback(last_frame, img, None, 0.5, 3, 15, 3, 5, 1.2, 0) flow = np.sqrt((flow**2).sum(2)) if self.show: cv2.imshow('frame', flow.astype('int8')) cv2.waitKey(1) last_frame = img flows.append(flow.sum()) onsets.append(f.onset) durations.append(f.duration) return ExtractorResult(flows, stim, self, features=['total_flow'], onsets=onsets, durations=durations)
def _extract(self, stim): scores = self.analyzer.polarity_scores(stim.text) features = ['sentiment_' + k for k in scores.keys()] return ExtractorResult([list(scores.values())], stim, self, features=features)
def _extract(self, stims): tokens = [stim.data for stim in stims if stim.data is not None] scores = [model(tokens) for model in self.models] results = [] for i, stim in enumerate(stims): features, data = [], [] for j, score in enumerate(scores): if isinstance(score[i], float): features.append(self.names[j]) data.append(score[i]) elif isinstance(score[i], dict): for k in score[i].keys(): features.append(self.names[j] + '_' + k) data.append(score[i][k]) results.append( ExtractorResult([data], stim, self, features=features, onsets=stim.onset, durations=stim.duration)) return results
def _extract(self, stims): verify_dependencies(['clarifai_client']) moc = clarifai_client.ModelOutputConfig( min_value=self.min_value, max_concepts=self.max_concepts, select_concepts=self.select_concepts) output_config = moc model_output_info = clarifai_client.ModelOutputInfo( output_config=output_config) # ExitStack lets us use filename context managers simultaneously with ExitStack() as stack: files = [stack.enter_context(s.get_filename()) for s in stims] imgs = [ clarifai_client.Image(filename=filename) for filename in files ] tags = self.model.predict(imgs, model_output_info=model_output_info) extracted = [] for i, res in enumerate(tags['outputs']): data = res['data']['concepts'] concepts = [] values = [] for d in data: concepts.append(d['name']) values.append(d['value']) extracted.append( ExtractorResult([values], stims[i], self, features=concepts)) return extracted
def test_indico_api_image_extractor(): ext = IndicoAPIImageExtractor(api_key=os.environ['INDICO_APP_KEY'], models=['fer', 'content_filtering']) image_dir = join(get_test_data_path(), 'image') stim1 = ImageStim(join(image_dir, 'apple.jpg')) result1 = ExtractorResult.merge_stims(ext.transform([stim1, stim1])) outdfKeysCheck = { 'onset', 'duration', 'fer_Surprise', 'fer_Neutral', 'fer_Sad', 'fer_Happy', 'fer_Angry', 'fer_Fear', 'content_filtering'} meta_columns = {'source_file', 'history', 'class', 'filename'} assert set(result1.columns) - set(['stim_name']) == outdfKeysCheck | meta_columns assert result1['content_filtering'][0] < 0.2 stim2 = ImageStim(join(image_dir, 'obama.jpg')) result2 = ext.transform(stim2).to_df() assert set(result2.columns) == outdfKeysCheck assert result2['fer_Happy'][0] > 0.7
def _extract(self, stim): data = stim.data vibrance = np.var(data, 2).mean() return ExtractorResult(np.array([[vibrance]]), stim, self, features=['vibrance'])
def _extract(self, stim): if stim.text not in self.data.index: vals = pd.Series(self.missing, self.variables) else: vals = self.data.loc[stim.text].fillna(self.missing) vals = vals.to_dict() return ExtractorResult(np.array([list(vals.values())]), stim, self, features=list(vals.keys()))
def _extract(self, collection_id, **kwargs): self.collection_id = collection_id values = self._get_values() return ExtractorResult(values, collection_id, self, features=values, )
def _extract(self, stim): time_bins = np.arange(0., stim.duration, 1.) return ExtractorResult(np.array([1] * len(time_bins)), stim, self, features=['constant'], onsets=time_bins, durations=[1.] * len(time_bins))
def _extract(self, stim): values = self.func(stim.data) feature_names = listify(self.get_feature_names()) return ExtractorResult(values, stim, self, features=feature_names, raw=values)
def _extract(self, stim): data = stim.data brightness = np.amax(data, 2).mean() / 255.0 return ExtractorResult(np.array([[brightness]]), stim, self, features=['brightness'])
def _extract(self, stims): tokens = self._get_tokens(stims) scores = [model(tokens) for model in self.models] results = [] for i, stim in enumerate(stims): stim_scores = [s[i] for s in scores] results.append(ExtractorResult(stim_scores, stim, self)) return results
def test_merge_extractor_results_by_features(): np.random.seed(100) image_dir = join(get_test_data_path(), 'image') stim = ImageStim(join(image_dir, 'apple.jpg')) # Merge results for static Stims (no onsets) extractors = [BrightnessExtractor(), VibranceExtractor()] results = [e.transform(stim) for e in extractors] df = ExtractorResult.merge_features(results) de = DummyExtractor() de_names = ['Extractor1', 'Extractor2', 'Extractor3'] results = [de.transform(stim, name) for name in de_names] df = ExtractorResult.merge_features(results) assert df.shape == (177, 14) assert df.columns.levels[1].unique().tolist() == ['duration', 0, 1, 2, ''] cols = cols = ['onset', 'class', 'filename', 'history', 'stim'] assert df.columns.levels[0].unique().tolist() == de_names + cols
def _extract(self, stims): tokens = [stim.data for stim in stims if stim.data is not None] scores = [model(tokens) for model in self.models] results = [] for i, stim in enumerate(stims): stim_scores = [s[i] for s in scores] results.append(ExtractorResult(stim_scores, stim, self)) return results
def test_merge_extractor_results_by_stims(): image_dir = join(get_test_data_path(), 'image') stim1 = ImageStim(join(image_dir, 'apple.jpg')) stim2 = ImageStim(join(image_dir, 'obama.jpg')) de = DummyExtractor() results = [de.transform(stim1), de.transform(stim2)] df = ExtractorResult.merge_stims(results) assert df.shape == (200, 6) assert set(df.columns.tolist()) == set(['onset', 'duration', 0, 1, 2, 'stim']) assert set(df['stim'].unique()) == set(['obama.jpg', 'apple.jpg'])
def _extract(self, stims): mat = self.vectorizer.fit_transform([s.text for s in stims]).toarray() results = [] for i, row in enumerate(mat): results.append( ExtractorResult([row], stims[i], self, features=self.vectorizer.get_feature_names())) return results
def _extract(self, stim): if self.extractor_type is None: self.extractor_type = 'detect_face' values = self._get_values(stim) return ExtractorResult(values, stim, self, features=values, )
def _extract(self, stims): request = self._build_request(stims) responses = self._query_api(request) results = [] for i, response in enumerate(responses): if response and self.response_object in response: annotations = response[self.response_object] features, values = self._parse_annotations(annotations) values = [values] results.append( ExtractorResult(values, stims[i], self, features=features)) elif 'error' in response: raise Exception(response['error']['message']) else: results.append( ExtractorResult([[]], stims[i], self, features=[])) return results
def _extract(self, stim): with stim.get_filename() as filename: with open(filename, 'rb') as f: tags = self.tagger.tag_images( f, select_classes=self.select_classes) tagged = tags['results'][0]['result']['tag'] return ExtractorResult([tagged['probs']], stim, self, features=tagged['classes'])
def _extract(self, stim): verify_dependencies(['cv2']) # Taken from # http://stackoverflow.com/questions/7765810/is-there-a-way-to-detect-if-an-image-is-blurry?lq=1 data = stim.data gray_image = cv2.cvtColor(data, cv2.COLOR_BGR2GRAY) sharpness = np.max( cv2.convertScaleAbs(cv2.Laplacian(gray_image, 3))) / 255.0 return ExtractorResult(np.array([[sharpness]]), stim, self, features=['sharpness'])
def _extract(self, stim, collection_id, face_match_threshold=None, max_faces=None, **kwargs): self.collection_id = collection_id self.face_match_threshold = face_match_threshold self.max_faces = max_faces values = self._get_values(stim) return ExtractorResult(values, stim, self, features=values, )
def _extract(self, collection_id, face_id, max_faces=None, **kwargs): self.collection_id = collection_id self.face_id = face_id self.max_faces = max_faces values = self._get_values() return ExtractorResult(values, face_id, self, features=values, )