Пример #1
0
def test_indico_api_text_extractor():

    ext = IndicoAPITextExtractor(api_key=os.environ['INDICO_APP_KEY'],
                                 models=['emotion', 'personality'])

    # With ComplexTextStim input
    srtfile = join(get_test_data_path(), 'text', 'wonderful.srt')
    srt_stim = ComplexTextStim(srtfile, onset=4.2)
    result = ExtractorResult.merge_stims(ext.transform(srt_stim))
    outdfKeysCheck = {
        'onset',
        'duration',
        'emotion_anger',
        'emotion_fear',
        'emotion_joy',
        'emotion_sadness',
        'emotion_surprise',
        'personality_openness',
        'personality_extraversion',
        'personality_agreeableness',
        'personality_conscientiousness'}
    meta_columns = {'source_file',
                    'history',
                    'class',
                    'filename'}
    assert set(result.columns) - set(['stim_name']) == outdfKeysCheck | meta_columns
    assert result['onset'][1] == 92.622

    # With TextStim input
    ts = TextStim(text="It's a wonderful life.")
    result = ext.transform(ts).to_df()
    assert set(result.columns) == outdfKeysCheck
    assert len(result) == 1
Пример #2
0
    def _extract(self, stim):
        data = self._stft(stim)
        events = []
        time_bins = np.arange(0., stim.duration - self.frame_size,
                              self.hop_size)

        if isinstance(self.freq_bins, int):
            bins = []
            bin_size = data.shape[1] / self.freq_bins
            for i in range(self.freq_bins):
                bins.append((i * bin_size, (i + 1) * bin_size))
            self.freq_bins = bins

        features = ['%d_%d' % fb for fb in self.freq_bins]
        index = [tb for tb in time_bins]
        values = np.zeros((len(index), len(features)))
        for i, fb in enumerate(self.freq_bins):
            start, stop = fb
            values[:, i] = data[:, start:stop].mean(1)
        values = np.nan_to_num(values)
        return ExtractorResult(values,
                               stim,
                               self,
                               features=features,
                               onsets=index,
                               durations=self.hop_size)
Пример #3
0
    def _extract(self, stim):
        required_shape = (299, 299, 3)
        x = stim.data
        if x.ndim != 3:
            raise ValueError("Stim data must have rank 3 but got rank {}".format(x.ndim))
        if x.shape != required_shape:
            x = _resize_image(x, required_shape[:-1])
        # Add batch dimension.
        x = x[None]
        # Normalize the features.
        x = tf.keras.applications.inception_v3.preprocess_input(x)
        preds = self.model.predict(x, batch_size=1)

        # This produces a nested list. There is one sub-list per sample in the
        # batch, and each sub-list contains `self.num_predictions` tuples with
        # `(ID, human-readable-label, probability)`.
        decoded = tf.keras.applications.inception_v3.decode_predictions(
            preds, top=self.num_predictions)

        # We assume there is only one sample in the batch.
        decoded = decoded[0]
        values = [t[2] for t in decoded]
        features = [t[1] for t in decoded]

        return ExtractorResult([values], stim, self, features=features)
Пример #4
0
    def _extract(self, stim):
        from pliers.external import pySaliencyMap
        # pySaliencyMap from https://github.com/akisato-/pySaliencyMap
        data = stim.data

        # Initialize variables
        h, w, c = stim.data.shape
        sm = pySaliencyMap.pySaliencyMap(h, w)

        # Compute saliency maps and store full maps as derivatives
        stim.derivatives = dict()
        stim.derivatives['saliency_map'] = sm.SMGetSM(stim.data)
        stim.derivatives['binarized_map'] = sm.SMGetBinarizedSM(
            stim.data)  #thresholding done using Otsu

        # Compute summary statistics
        output = {}
        output['max_saliency'] = np.max(stim.derivatives['saliency_map'])
        output['max_y'], output['max_x'] = [
            list(i)[0] for i in np.where(
                stim.derivatives['saliency_map'] == output['max_saliency'])
        ]
        output['frac_high_saliency'] = np.sum(
            stim.derivatives['binarized_map'] / 255.0) / (h * w)

        return ExtractorResult(np.array([list(output.values())]),
                               stim,
                               self,
                               features=list(output.keys()))
Пример #5
0
    def _extract(self, stim):

        amps = stim.audio.data
        sampling_rate = stim.audio.sampling_rate
        elements = stim.complex_text.elements
        values, onsets, durations = [], [], []

        for i, el in enumerate(elements):
            onset = sampling_rate * el.onset
            onsets.append(onset)
            duration = sampling_rate * el.duration
            durations.append(duration)

            r_onset = np.round(onset).astype(int)
            r_offset = np.round(onset + duration).astype(int)
            if not r_offset <= amps.shape[0]:
                raise Exception('Block ends after data.')

            mean_amplitude = np.mean(amps[r_onset:r_offset])
            values.append(mean_amplitude)

        return ExtractorResult(values,
                               stim,
                               self,
                               features=['mean_amplitude'],
                               onsets=onsets,
                               durations=durations)
Пример #6
0
    def _extract(self, stim):
        data = self._stft(stim)
        time_bins = np.arange(0., stim.duration - self.frame_size,
                              self.hop_size)

        if isinstance(self.freq_bins, int):
            bins = []
            bin_size = int(data.shape[1] / self.freq_bins)
            for i in range(self.freq_bins):
                if i == self.freq_bins - 1:
                    bins.append((i * bin_size, data.shape[1]))
                else:
                    bins.append((i * bin_size, (i + 1) * bin_size))
            self.freq_bins = bins

        features = ['%d_%d' % fb for fb in self.freq_bins]
        offset = 0.0 if stim.onset is None else stim.onset
        index = [tb + offset for tb in time_bins]
        values = np.zeros((len(index), len(features)))
        for i, fb in enumerate(self.freq_bins):
            start, stop = fb
            values[:, i] = data[:, start:stop].mean(1)
        values[np.isnan(values)] = 0.
        values[np.isinf(values)] = 0.
        return ExtractorResult(values,
                               stim,
                               self,
                               features=features,
                               onsets=index,
                               durations=self.hop_size,
                               orders=list(range(len(index))))
Пример #7
0
    def _extract(self, stim):
        if isinstance(stim, ImageStim):
            stim = [stim]

        request = self._build_request(stim)
        responses = self._query_api(request)

        features = []
        data = []
        for i, response in enumerate(responses):
            if response and self.response_object in response:
                annotations = response[self.response_object]
                feat, values = self._parse_annotations(annotations)
                features += feat
                data += values
            elif 'error' in response:
                raise Exception(response['error']['message'])

        data = [data]
        onsets = [
            stim[i].onset if hasattr(stim[i], 'onset') else i
            for i in range(len(responses))
        ]
        durations = [stim[i].duration for i in range(len(responses))]
        return ExtractorResult(data,
                               stim,
                               self,
                               features=features,
                               onsets=onsets,
                               durations=durations)
Пример #8
0
    def _extract(self, stim):

        values = self._get_values(stim)

        if self._feature == 'beat_track':
            beats = np.array(values[1])
            values = beats

        values = values.T
        n_frames = len(values)

        feature_names = listify(self.get_feature_names())

        onsets = librosa.frames_to_time(range(n_frames),
                                        sr=stim.sampling_rate,
                                        hop_length=self.hop_length)

        onsets = onsets + stim.onset if stim.onset else onsets

        durations = [self.hop_length / float(stim.sampling_rate)] * n_frames

        return ExtractorResult(values,
                               stim,
                               self,
                               features=feature_names,
                               onsets=onsets,
                               durations=durations,
                               orders=list(range(n_frames)))
Пример #9
0
    def _extract(self, stim):

        flows = []
        onsets = []
        durations = []
        for i, f in enumerate(stim):

            img = f.data
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

            if i == 0:
                last_frame = img

            flow = cv2.calcOpticalFlowFarneback(last_frame, img, None, 0.5, 3,
                                                15, 3, 5, 1.2, 0)
            flow = np.sqrt((flow**2).sum(2))

            if self.show:
                cv2.imshow('frame', flow.astype('int8'))
                cv2.waitKey(1)

            last_frame = img
            flows.append(flow.sum())
            onsets.append(f.onset)
            durations.append(f.duration)

        return ExtractorResult(flows,
                               stim,
                               self,
                               features=['total_flow'],
                               onsets=onsets,
                               durations=durations)
Пример #10
0
 def _extract(self, stim):
     scores = self.analyzer.polarity_scores(stim.text)
     features = ['sentiment_' + k for k in scores.keys()]
     return ExtractorResult([list(scores.values())],
                            stim,
                            self,
                            features=features)
Пример #11
0
    def _extract(self, stims):
        tokens = [stim.data for stim in stims if stim.data is not None]
        scores = [model(tokens) for model in self.models]

        results = []
        for i, stim in enumerate(stims):
            features, data = [], []
            for j, score in enumerate(scores):
                if isinstance(score[i], float):
                    features.append(self.names[j])
                    data.append(score[i])
                elif isinstance(score[i], dict):
                    for k in score[i].keys():
                        features.append(self.names[j] + '_' + k)
                        data.append(score[i][k])

            results.append(
                ExtractorResult([data],
                                stim,
                                self,
                                features=features,
                                onsets=stim.onset,
                                durations=stim.duration))

        return results
Пример #12
0
    def _extract(self, stims):
        verify_dependencies(['clarifai_client'])
        moc = clarifai_client.ModelOutputConfig(
            min_value=self.min_value,
            max_concepts=self.max_concepts,
            select_concepts=self.select_concepts)
        output_config = moc
        model_output_info = clarifai_client.ModelOutputInfo(
            output_config=output_config)

        # ExitStack lets us use filename context managers simultaneously
        with ExitStack() as stack:
            files = [stack.enter_context(s.get_filename()) for s in stims]
            imgs = [
                clarifai_client.Image(filename=filename) for filename in files
            ]
            tags = self.model.predict(imgs,
                                      model_output_info=model_output_info)

        extracted = []
        for i, res in enumerate(tags['outputs']):
            data = res['data']['concepts']
            concepts = []
            values = []
            for d in data:
                concepts.append(d['name'])
                values.append(d['value'])
            extracted.append(
                ExtractorResult([values], stims[i], self, features=concepts))

        return extracted
Пример #13
0
def test_indico_api_image_extractor():

    ext = IndicoAPIImageExtractor(api_key=os.environ['INDICO_APP_KEY'],
                                  models=['fer', 'content_filtering'])

    image_dir = join(get_test_data_path(), 'image')
    stim1 = ImageStim(join(image_dir, 'apple.jpg'))
    result1 = ExtractorResult.merge_stims(ext.transform([stim1, stim1]))

    outdfKeysCheck = {
        'onset',
        'duration',
        'fer_Surprise',
        'fer_Neutral',
        'fer_Sad',
        'fer_Happy',
        'fer_Angry',
        'fer_Fear',
        'content_filtering'}
    meta_columns = {'source_file',
                    'history',
                    'class',
                    'filename'}

    assert set(result1.columns) - set(['stim_name']) == outdfKeysCheck | meta_columns
    assert result1['content_filtering'][0] < 0.2

    stim2 = ImageStim(join(image_dir, 'obama.jpg'))
    result2 = ext.transform(stim2).to_df()
    assert set(result2.columns) == outdfKeysCheck
    assert result2['fer_Happy'][0] > 0.7
Пример #14
0
 def _extract(self, stim):
     data = stim.data
     vibrance = np.var(data, 2).mean()
     return ExtractorResult(np.array([[vibrance]]),
                            stim,
                            self,
                            features=['vibrance'])
Пример #15
0
 def _extract(self, stim):
     if stim.text not in self.data.index:
         vals = pd.Series(self.missing, self.variables)
     else:
         vals = self.data.loc[stim.text].fillna(self.missing)
     vals = vals.to_dict()
     return ExtractorResult(np.array([list(vals.values())]), stim, self,
                            features=list(vals.keys()))
Пример #16
0
    def _extract(self, collection_id, **kwargs):
        self.collection_id = collection_id

        values = self._get_values()

        return ExtractorResult(values, collection_id, self,
                               features=values,
                               )
Пример #17
0
 def _extract(self, stim):
     time_bins = np.arange(0., stim.duration, 1.)
     return ExtractorResult(np.array([1] * len(time_bins)),
                            stim,
                            self,
                            features=['constant'],
                            onsets=time_bins,
                            durations=[1.] * len(time_bins))
Пример #18
0
 def _extract(self, stim):
     values = self.func(stim.data)
     feature_names = listify(self.get_feature_names())
     return ExtractorResult(values,
                            stim,
                            self,
                            features=feature_names,
                            raw=values)
Пример #19
0
    def _extract(self, stim):
        data = stim.data
        brightness = np.amax(data, 2).mean() / 255.0

        return ExtractorResult(np.array([[brightness]]),
                               stim,
                               self,
                               features=['brightness'])
Пример #20
0
    def _extract(self, stims):
        tokens = self._get_tokens(stims)
        scores = [model(tokens) for model in self.models]

        results = []
        for i, stim in enumerate(stims):
            stim_scores = [s[i] for s in scores]
            results.append(ExtractorResult(stim_scores, stim, self))
        return results
Пример #21
0
def test_merge_extractor_results_by_features():
    np.random.seed(100)
    image_dir = join(get_test_data_path(), 'image')
    stim = ImageStim(join(image_dir, 'apple.jpg'))

    # Merge results for static Stims (no onsets)
    extractors = [BrightnessExtractor(), VibranceExtractor()]
    results = [e.transform(stim) for e in extractors]
    df = ExtractorResult.merge_features(results)

    de = DummyExtractor()
    de_names = ['Extractor1', 'Extractor2', 'Extractor3']
    results = [de.transform(stim, name) for name in de_names]
    df = ExtractorResult.merge_features(results)
    assert df.shape == (177, 14)
    assert df.columns.levels[1].unique().tolist() == ['duration', 0, 1, 2, '']
    cols = cols = ['onset', 'class', 'filename', 'history', 'stim']
    assert df.columns.levels[0].unique().tolist() == de_names + cols
Пример #22
0
    def _extract(self, stims):
        tokens = [stim.data for stim in stims if stim.data is not None]
        scores = [model(tokens) for model in self.models]

        results = []
        for i, stim in enumerate(stims):
            stim_scores = [s[i] for s in scores]
            results.append(ExtractorResult(stim_scores, stim, self))
        return results
Пример #23
0
def test_merge_extractor_results_by_stims():
    image_dir = join(get_test_data_path(), 'image')
    stim1 = ImageStim(join(image_dir, 'apple.jpg'))
    stim2 = ImageStim(join(image_dir, 'obama.jpg'))
    de = DummyExtractor()
    results = [de.transform(stim1), de.transform(stim2)]
    df = ExtractorResult.merge_stims(results)
    assert df.shape == (200, 6)
    assert set(df.columns.tolist()) == set(['onset', 'duration', 0, 1, 2, 'stim'])
    assert set(df['stim'].unique()) == set(['obama.jpg', 'apple.jpg'])
Пример #24
0
 def _extract(self, stims):
     mat = self.vectorizer.fit_transform([s.text for s in stims]).toarray()
     results = []
     for i, row in enumerate(mat):
         results.append(
             ExtractorResult([row],
                             stims[i],
                             self,
                             features=self.vectorizer.get_feature_names()))
     return results
Пример #25
0
    def _extract(self, stim):

        if self.extractor_type is None:
            self.extractor_type = 'detect_face'

        values = self._get_values(stim)

        return ExtractorResult(values, stim, self,
                               features=values,
                               )
Пример #26
0
    def _extract(self, stims):
        request = self._build_request(stims)
        responses = self._query_api(request)

        results = []
        for i, response in enumerate(responses):
            if response and self.response_object in response:
                annotations = response[self.response_object]
                features, values = self._parse_annotations(annotations)
                values = [values]
                results.append(
                    ExtractorResult(values, stims[i], self, features=features))
            elif 'error' in response:
                raise Exception(response['error']['message'])

            else:
                results.append(
                    ExtractorResult([[]], stims[i], self, features=[]))

        return results
Пример #27
0
    def _extract(self, stim):
        with stim.get_filename() as filename:
            with open(filename, 'rb') as f:
                tags = self.tagger.tag_images(
                    f, select_classes=self.select_classes)

        tagged = tags['results'][0]['result']['tag']
        return ExtractorResult([tagged['probs']],
                               stim,
                               self,
                               features=tagged['classes'])
Пример #28
0
    def _extract(self, stim):
        verify_dependencies(['cv2'])
        # Taken from
        # http://stackoverflow.com/questions/7765810/is-there-a-way-to-detect-if-an-image-is-blurry?lq=1
        data = stim.data
        gray_image = cv2.cvtColor(data, cv2.COLOR_BGR2GRAY)

        sharpness = np.max(
            cv2.convertScaleAbs(cv2.Laplacian(gray_image, 3))) / 255.0
        return ExtractorResult(np.array([[sharpness]]), stim, self,
                               features=['sharpness'])
Пример #29
0
    def _extract(self, stim, collection_id, face_match_threshold=None, max_faces=None, **kwargs):

        self.collection_id = collection_id
        self.face_match_threshold = face_match_threshold

        self.max_faces = max_faces

        values = self._get_values(stim)

        return ExtractorResult(values, stim, self,
                               features=values,
                               )
Пример #30
0
    def _extract(self, collection_id, face_id, max_faces=None, **kwargs):

        self.collection_id = collection_id
        self.face_id = face_id

        self.max_faces = max_faces

        values = self._get_values()

        return ExtractorResult(values, face_id, self,
                               features=values,
                               )