Exemplo n.º 1
0
def test_word_embedding_extractor():
    pytest.importorskip('gensim')
    stims = [TextStim(text='this'), TextStim(text='sentence')]
    ext = WordEmbeddingExtractor(join(TEXT_DIR, 'simple_vectors.bin'),
                                 binary=True)
    result = merge_results(ext.transform(stims), extractor_names='multi',
                           format='wide')
    assert ('WordEmbeddingExtractor', 'embedding_dim99') in result.columns
    assert np.allclose(0.0010911,
                       result[('WordEmbeddingExtractor', 'embedding_dim0')][0])

    unk = TextStim(text='nowaythisinvocab')
    result = ext.transform(unk).to_df()
    assert result['embedding_dim10'][0] == 0.0

    ones = np.ones(100)
    ext = WordEmbeddingExtractor(join(TEXT_DIR, 'simple_vectors.bin'),
                                 binary=True, unk_vector=ones)
    result = ext.transform(unk).to_df()
    assert result['embedding_dim10'][0] == 1.0

    ext = WordEmbeddingExtractor(join(TEXT_DIR, 'simple_vectors.bin'),
                                 binary=True, unk_vector='random')
    result = ext.transform(unk).to_df()
    assert result['embedding_dim10'][0] <= 1.0
    assert result['embedding_dim10'][0] >= -1.0

    ext = WordEmbeddingExtractor(join(TEXT_DIR, 'simple_vectors.bin'),
                                 binary=True, unk_vector='nothing')
    result = ext.transform(unk).to_df()
    assert result['embedding_dim10'][0] == 0.0
Exemplo n.º 2
0
def test_indico_api_text_extractor():

    ext = IndicoAPITextExtractor(api_key=os.environ['INDICO_APP_KEY'],
                                 models=['emotion', 'personality'])

    # With ComplexTextStim input
    srtfile = join(get_test_data_path(), 'text', 'wonderful.srt')
    srt_stim = ComplexTextStim(srtfile, onset=4.2)
    result = merge_results(ext.transform(srt_stim), extractor_names=False)
    outdfKeysCheck = {
        'onset', 'duration', 'order', 'object_id', 'emotion_anger',
        'emotion_fear', 'emotion_joy', 'emotion_sadness', 'emotion_surprise',
        'personality_openness', 'personality_extraversion',
        'personality_agreeableness', 'personality_conscientiousness'
    }
    meta_columns = {'source_file', 'history', 'class', 'filename'}
    assert set(result.columns) - set(['stim_name'
                                      ]) == outdfKeysCheck | meta_columns
    assert result['onset'][1] == 92.622

    # With TextStim input
    ts = TextStim(text="It's a wonderful life.")
    result = ext.transform(ts).to_df(object_id=True)
    assert set(result.columns) == outdfKeysCheck
    assert len(result) == 1
Exemplo n.º 3
0
def test_indico_api_image_extractor():
    ext = IndicoAPIImageExtractor(api_key=os.environ['INDICO_APP_KEY'],
                                  models=['fer', 'content_filtering'])
    stim1 = ImageStim(join(IMAGE_DIR, 'apple.jpg'))
    result1 = merge_results(ext.transform([stim1, stim1]),
                            extractor_names=False)
    outdfKeysCheck = {
        'object_id', 'fer_Surprise', 'fer_Neutral', 'fer_Sad', 'fer_Happy',
        'fer_Angry', 'fer_Fear', 'content_filtering'
    }
    meta_columns = {
        'source_file', 'history', 'class', 'filename', 'onset', 'duration',
        'order'
    }

    assert set(result1.columns) - set(['stim_name'
                                       ]) == outdfKeysCheck | meta_columns
    assert result1['content_filtering'][0] < 0.2

    stim2 = ImageStim(join(IMAGE_DIR, 'obama.jpg'))
    result2 = ext.transform(stim2).to_df(timing=False, object_id=True)
    assert set(result2.columns) == outdfKeysCheck
    assert result2['fer_Happy'][0] > 0.7

    url = 'https://tuition.utexas.edu/sites/all/themes/tuition/logo.png'
    stim = ImageStim(url=url)
    result = ext.transform(stim).to_df()
    assert result['fer_Neutral'][0] > 0.1
Exemplo n.º 4
0
def test_indico_api_image_extractor():
    ext = IndicoAPIImageExtractor(api_key=os.environ['INDICO_APP_KEY'],
                                  models=['fer', 'content_filtering'])

    image_dir = join(get_test_data_path(), 'image')
    stim1 = ImageStim(join(image_dir, 'apple.jpg'))
    result1 = merge_results(ext.transform([stim1, stim1]),
                            extractor_names=False)
    outdfKeysCheck = {
        'object_id', 'fer_Surprise', 'fer_Neutral', 'fer_Sad', 'fer_Happy',
        'fer_Angry', 'fer_Fear', 'content_filtering'
    }
    meta_columns = {
        'source_file', 'history', 'class', 'filename', 'onset', 'duration',
        'order'
    }

    assert set(result1.columns) - set(['stim_name'
                                       ]) == outdfKeysCheck | meta_columns
    assert result1['content_filtering'][0] < 0.2

    stim2 = ImageStim(join(image_dir, 'obama.jpg'))
    result2 = ext.transform(stim2).to_df(timing=False, object_id=True)
    assert set(result2.columns) == outdfKeysCheck
    assert result2['fer_Happy'][0] > 0.7
Exemplo n.º 5
0
def test_word_embedding_extractor():
    pytest.importorskip('gensim')
    stims = [TextStim(text='this'), TextStim(text='sentence')]
    ext = WordEmbeddingExtractor(join(TEXT_DIR, 'simple_vectors.bin'),
                                 binary=True)
    result = merge_results(ext.transform(stims),
                           extractor_names='multi',
                           format='wide')
    assert ('WordEmbeddingExtractor', 'embedding_dim99') in result.columns
    assert np.allclose(0.0010911,
                       result[('WordEmbeddingExtractor', 'embedding_dim0')][0])

    unk = TextStim(text='nowaythisinvocab')
    result = ext.transform(unk).to_df()
    assert result['embedding_dim10'][0] == 0.0

    ones = np.ones(100)
    ext = WordEmbeddingExtractor(join(TEXT_DIR, 'simple_vectors.bin'),
                                 binary=True,
                                 unk_vector=ones)
    result = ext.transform(unk).to_df()
    assert result['embedding_dim10'][0] == 1.0

    ext = WordEmbeddingExtractor(join(TEXT_DIR, 'simple_vectors.bin'),
                                 binary=True,
                                 unk_vector='random')
    result = ext.transform(unk).to_df()
    assert result['embedding_dim10'][0] <= 1.0
    assert result['embedding_dim10'][0] >= -1.0

    ext = WordEmbeddingExtractor(join(TEXT_DIR, 'simple_vectors.bin'),
                                 binary=True,
                                 unk_vector='nothing')
    result = ext.transform(unk).to_df()
    assert result['embedding_dim10'][0] == 0.0
Exemplo n.º 6
0
def test_indico_api_image_extractor():
    ext = IndicoAPIImageExtractor(api_key=os.environ['INDICO_APP_KEY'],
                                  models=['fer', 'content_filtering'])
    stim1 = ImageStim(join(IMAGE_DIR, 'apple.jpg'))
    result1 = merge_results(ext.transform([stim1, stim1]),
                            extractor_names=False)
    outdfKeysCheck = {
        'object_id',
        'fer_Surprise',
        'fer_Neutral',
        'fer_Sad',
        'fer_Happy',
        'fer_Angry',
        'fer_Fear',
        'content_filtering'}
    meta_columns = {'source_file',
                    'history',
                    'class',
                    'filename',
                    'onset',
                    'duration',
                    'order'}

    assert set(result1.columns) - set(['stim_name']) == outdfKeysCheck | meta_columns
    assert result1['content_filtering'][0] < 0.2

    stim2 = ImageStim(join(IMAGE_DIR, 'obama.jpg'))
    result2 = ext.transform(stim2).to_df(timing=False, object_id=True)
    assert set(result2.columns) == outdfKeysCheck
    assert result2['fer_Happy'][0] > 0.7

    url = 'https://via.placeholder.com/350x150'
    stim = ImageStim(url=url)
    result = ext.transform(stim).to_df()
    assert result['fer_Neutral'][0] > 0.
Exemplo n.º 7
0
def test_clarifai_api_extractor_batch():
    stim = ImageStim(join(IMAGE_DIR, 'apple.jpg'))
    stim2 = ImageStim(join(IMAGE_DIR, 'obama.jpg'))
    ext = ClarifaiAPIImageExtractor()
    results = ext.transform([stim, stim2])
    results = merge_results(results)
    assert results['ClarifaiAPIImageExtractor#apple'][0] > 0.5 or \
        results['ClarifaiAPIImageExtractor#apple'][1] > 0.5
Exemplo n.º 8
0
def test_word_embedding_extractor():
    pytest.importorskip('gensim')
    stims = [TextStim(text='this'), TextStim(text='sentence')]
    ext = WordEmbeddingExtractor(join(TEXT_DIR, 'simple_vectors.bin'),
                                 binary=True)
    result = merge_results(ext.transform(stims))
    assert ('WordEmbeddingExtractor', 'embedding_dim99') in result.columns
    assert 0.001091 in result[('WordEmbeddingExtractor', 'embedding_dim0')]
Exemplo n.º 9
0
def test_part_of_speech_extractor():
    import nltk
    nltk.download('tagsets')
    stim = ComplexTextStim(join(TEXT_DIR, 'complex_stim_with_header.txt'))
    result = merge_results(PartOfSpeechExtractor().transform(stim), extractor_names=False)
    assert result.shape == (4, 52)
    assert result['NN'].sum() == 1
    assert result['VBD'][3] == 1
Exemplo n.º 10
0
def test_tfhub_image_reshape():
    stim = ImageStim(join(IMAGE_DIR, 'apple.jpg'))
    stim2 = ImageStim(join(IMAGE_DIR, 'obama.jpg'))
    ext = TFHubImageExtractor(MNET_URL,
                              reshape_input=(224, 224, 3),
                              features='feature_vector')
    df = merge_results(ext.transform([stim, stim2]), extractor_names=False)
    assert df.shape[0] == 2
    assert all([len(v) == 1280 for v in df['feature_vector']])
Exemplo n.º 11
0
def test_clarifai_api_extractor_batch():
    image_dir = join(get_test_data_path(), 'image')
    stim = ImageStim(join(image_dir, 'apple.jpg'))
    stim2 = ImageStim(join(image_dir, 'obama.jpg'))
    ext = ClarifaiAPIExtractor()
    results = ext.transform([stim, stim2])
    results = merge_results(results)
    assert results['ClarifaiAPIExtractor#apple'][0] > 0.5 or \
        results['ClarifaiAPIExtractor#apple'][1] > 0.5
def test_clarifai_api_extractor_large():
    default = config.get_option('allow_large_jobs')
    default_large = config.get_option('large_job')
    config.set_option('allow_large_jobs', False)
    config.set_option('large_job', 1)

    ext = ClarifaiAPIExtractor()
    images = [ImageStim(join(get_test_data_path(), 'image', 'apple.jpg'))] * 2
    with pytest.raises(ValueError):
        merge_results(ext.transform(images))

    config.set_option('allow_large_jobs', True)
    results = merge_results(ext.transform(images))
    assert 'ClarifaiAPIExtractor#apple' in results.columns
    assert results.shape == (1, 29)  # not 2 cause all the same instance

    config.set_option('allow_large_jobs', default)
    config.set_option('large_job', default_large)
Exemplo n.º 13
0
def test_clarifai_api_extractor_large():
    default = config.get_option('allow_large_jobs')
    default_large = config.get_option('large_job')
    config.set_option('allow_large_jobs', False)
    config.set_option('large_job', 1)

    ext = ClarifaiAPIImageExtractor()
    images = [ImageStim(join(IMAGE_DIR, 'apple.jpg')),
              ImageStim(join(IMAGE_DIR, 'obama.jpg'))]
    with pytest.raises(ValueError):
        merge_results(ext.transform(images))

    config.set_option('allow_large_jobs', True)
    results = merge_results(ext.transform(images))
    assert 'ClarifaiAPIImageExtractor#apple' in results.columns
    assert results.shape == (2, 49)

    config.set_option('allow_large_jobs', default)
    config.set_option('large_job', default_large)
Exemplo n.º 14
0
def test_part_of_speech_extractor():
    import nltk
    nltk.download('tagsets')
    stim = ComplexTextStim(join(TEXT_DIR, 'complex_stim_with_header.txt'))
    result = merge_results(PartOfSpeechExtractor().transform(stim),
                           format='wide', extractor_names=False)
    assert result.shape == (4, 54)
    assert result['NN'].sum() == 1
    result = result.sort_values('onset')
    assert result['VBD'].iloc[3] == 1
Exemplo n.º 15
0
def test_tensor_flow_inception_v3_extractor():
    image_dir = join(get_test_data_path(), 'image')
    imgs = [join(image_dir, f) for f in ['apple.jpg', 'obama.jpg']]
    imgs = [ImageStim(im) for im in imgs]
    ext = TensorFlowInceptionV3Extractor()
    results = ext.transform(imgs)
    df = merge_results(results)
    assert len(df) == 2
    assert df.iloc[0][('TensorFlowInceptionV3Extractor', 'label_1')] == 'Granny Smith'
    assert df.iloc[1][('TensorFlowInceptionV3Extractor', 'score_2')] == '0.17326'
Exemplo n.º 16
0
def test_tfhub_text_transformer_tokens():
    cstim = ComplexTextStim(join(TEXT_DIR, 'wonderful.txt'))
    tkn_ext = TFHubTextExtractor(ELECTRA_URL,
                                 features='token_encodings',
                                 output_key='sequence_output',
                                 preprocessor_url_or_path=TOKENIZER_URL)
    tkn_df = merge_results(tkn_ext.transform(cstim.elements[:3]),
                           extractor_names=False)
    assert all([tkn_df['token_encodings'][i].shape == (128, 256) \
                for i in range(tkn_df.shape[0])])
Exemplo n.º 17
0
def test_tfhub_text_one_feature():
    stim = TextStim(join(TEXT_DIR, 'scandal.txt'))
    cstim = ComplexTextStim(join(TEXT_DIR, 'wonderful.txt'))
    ext = TFHubTextExtractor(GNEWS_URL, output_key=None, features='embedding')
    df = merge_results(ext.transform(cstim), extractor_names=False)
    assert df.shape[0] == len(cstim.elements)
    true = hub.KerasLayer(GNEWS_URL)([cstim.elements[3].text])[0, 2].numpy()
    assert np.isclose(df['embedding'][3][2], true)
    with pytest.raises(ValueError) as err:
        TFHubTextExtractor(GNEWS_URL, output_key='key').transform(stim)
    assert 'not a dictionary' in str(err.value)
Exemplo n.º 18
0
def test_indico_api_extractor_large():
    default = config.get_option('allow_large_jobs')
    default_large = config.get_option('large_job')
    config.set_option('allow_large_jobs', False)
    config.set_option('large_job', 1)

    ext = IndicoAPIImageExtractor(models=['fer'])

    images = [ImageStim(join(IMAGE_DIR, 'apple.jpg'))] * 2
    with pytest.raises(ValueError):
        merge_results(ext.transform(images))

    config.set_option('allow_large_jobs', True)

    results = merge_results(ext.transform(images))
    assert 'IndicoAPIImageExtractor#fer_Neutral' in results.columns
    assert results.shape == (1, 15)  # not 2 rows cause all the same instance

    config.set_option('allow_large_jobs', default)
    config.set_option('large_job', default_large)
Exemplo n.º 19
0
def test_tensor_flow_inception_v3_extractor():
    imgs = [join(IMAGE_DIR, f) for f in ['apple.jpg', 'obama.jpg']]
    imgs = [ImageStim(im, onset=4.2, duration=1) for im in imgs]
    ext = TensorFlowInceptionV3Extractor()
    results = ext.transform(imgs)
    df = merge_results(results, format='wide', extractor_names='multi')
    assert len(df) == 2
    assert ('TensorFlowInceptionV3Extractor', 'Granny Smith') in df.columns
    assert 0.22610 in df[
        ('TensorFlowInceptionV3Extractor', 'Windsor tie')].values
    assert 4.2 in df[('onset', np.nan)].values
    assert 1 in df[('duration', np.nan)].values
Exemplo n.º 20
0
def test_indico_api_extractor_large():
    default = config.get_option('allow_large_jobs')
    default_large = config.get_option('large_job')
    config.set_option('allow_large_jobs', False)
    config.set_option('large_job', 1)

    ext = IndicoAPIImageExtractor(models=['fer'])

    images = [ImageStim(join(IMAGE_DIR, 'apple.jpg')),
              ImageStim(join(IMAGE_DIR, 'obama.jpg'))]
    with pytest.raises(ValueError):
        merge_results(ext.transform(images))

    config.set_option('allow_large_jobs', True)

    results = merge_results(ext.transform(images))
    assert 'IndicoAPIImageExtractor#fer_Neutral' in results.columns
    assert results.shape == (2, 15)

    config.set_option('allow_large_jobs', default)
    config.set_option('large_job', default_large)
Exemplo n.º 21
0
def test_merge_extractor_results():
    np.random.seed(100)
    image_dir = join(get_test_data_path(), 'image')
    stim1 = ImageStim(join(image_dir, 'apple.jpg'))
    stim2 = ImageStim(join(image_dir, 'obama.jpg'))
    de_names = ['Extractor1', 'Extractor2', 'Extractor3']
    des = [DummyExtractor(name=name) for name in de_names]
    results = [de.transform(stim1) for de in des]
    results += [de.transform(stim2) for de in des]

    df = merge_results(results, format='wide')
    assert df.shape == (200, 18)
    cols = [
        'onset', 'duration', 'order', 'class', 'filename', 'history',
        'stim_name', 'source_file'
    ]
    assert not set(cols) - set(df.columns)
    assert 'Extractor2#feature_3' in df.columns

    df = merge_results(results, format='wide', extractor_names='drop')
    assert df.shape == (200, 12)
    assert not set(cols) - set(df.columns)
    assert 'feature_3' in df.columns

    df = merge_results(results, format='wide', extractor_names='multi')
    assert df.shape == (200, 18)
    _cols = [(c, np.nan) for c in cols]
    assert not set(_cols) - set(df.columns)
    assert ('Extractor2', 'feature_3') in df.columns

    with pytest.raises(ValueError):
        merge_results(results, format='long', extractor_names='multi')

    df = merge_results(results, format='long', extractor_names='column')
    assert df.shape == (1800, 12)
    _cols = cols + ['feature', 'extractor', 'value']
    assert not set(_cols) - set(df.columns)
    row = df.iloc[523, :]
    assert row['feature'] == 'feature_2'
    assert row['value'] == 475
    assert row['extractor'] == 'Extractor1'

    df = merge_results(results, format='long', extractor_names='drop')
    assert df.shape == (1800, 11)
    assert set(_cols) - set(df.columns) == {'extractor'}

    df = merge_results(results, format='long', extractor_names='prepend')
    assert df.shape == (1800, 11)
    row = df.iloc[523, :]
    assert row['feature'] == 'Extractor1#feature_2'
Exemplo n.º 22
0
def test_merge_extractor_results():
    np.random.seed(100)
    image_dir = join(get_test_data_path(), 'image')
    stim1 = ImageStim(join(image_dir, 'apple.jpg'))
    stim2 = ImageStim(join(image_dir, 'obama.jpg'))
    de_names = ['Extractor1', 'Extractor2', 'Extractor3']
    des = [DummyExtractor(name=name) for name in de_names]
    results = [de.transform(stim1) for de in des]
    results += [de.transform(stim2) for de in des]

    df = merge_results(results, format='wide')
    assert df.shape == (200, 18)
    cols = ['onset', 'duration', 'order', 'class', 'filename', 'history',
            'stim_name', 'source_file']
    assert not set(cols) - set(df.columns)
    assert 'Extractor2#feature_3' in df.columns

    df = merge_results(results, format='wide', extractor_names='drop')
    assert df.shape == (200, 12)
    assert not set(cols) - set(df.columns)
    assert 'feature_3' in df.columns

    df = merge_results(results, format='wide', extractor_names='multi')
    assert df.shape == (200, 18)
    _cols = [(c, np.nan) for c in cols]
    assert not set(_cols) - set(df.columns)
    assert ('Extractor2', 'feature_3') in df.columns

    with pytest.raises(ValueError):
        merge_results(results, format='long', extractor_names='multi')

    df = merge_results(results, format='long', extractor_names='column')
    assert df.shape == (1800, 12)
    _cols = cols + ['feature', 'extractor', 'value']
    assert not set(_cols) - set(df.columns)
    row = df.iloc[523, :]
    assert row['feature'] == 'feature_2'
    assert row['value'] == 475
    assert row['extractor'] == 'Extractor1'

    df = merge_results(results, format='long', extractor_names='drop')
    assert df.shape == (1800, 11)
    assert set(_cols) - set(df.columns) == {'extractor'}

    df = merge_results(results, format='long', extractor_names='prepend')
    assert df.shape == (1800, 11)
    row = df.iloc[523, :]
    assert row['feature'] == 'Extractor1#feature_2'
Exemplo n.º 23
0
def test_tensor_flow_inception_v3_extractor():
    image_dir = join(get_test_data_path(), 'image')
    imgs = [join(image_dir, f) for f in ['apple.jpg', 'obama.jpg']]
    imgs = [ImageStim(im, onset=4.2, duration=1) for im in imgs]
    ext = TensorFlowInceptionV3Extractor()
    results = ext.transform(imgs)
    df = merge_results(results)
    assert len(df) == 2
    assert 'Granny Smith' in df[('TensorFlowInceptionV3Extractor',
                                 'label_1')].values
    assert '0.22610' in df[('TensorFlowInceptionV3Extractor',
                            'score_2')].values
    assert 4.2 in df[('onset', '')].values
    assert 1 in df[('duration', '')].values
Exemplo n.º 24
0
def test_tensorflow_keras_inception_v3_extractor():
    imgs = [join(IMAGE_DIR, f) for f in ['apple.jpg', 'obama.jpg']]
    imgs = [ImageStim(im, onset=4.2, duration=1) for im in imgs]
    ext = TensorFlowKerasInceptionV3Extractor()
    results = ext.transform(imgs)
    df = merge_results(results, format='wide', extractor_names='multi')
    assert df.shape == (2, 19)
    true = 0.9737075
    pred = df['TensorFlowKerasInceptionV3Extractor'].loc[0, 'Granny_Smith']
    np.isclose(true, pred, 1e-05)
    true = 0.64234024
    pred = df['TensorFlowKerasInceptionV3Extractor'].loc[1, 'Windsor_tie']
    np.isclose(true, pred, 1e-05)
    assert 4.2 in df[('onset', np.nan)].values
    assert 1 in df[('duration', np.nan)].values
Exemplo n.º 25
0
def test_tensorflow_keras_inception_v3_extractor():
    imgs = [join(IMAGE_DIR, f) for f in ['apple.jpg', 'obama.jpg']]
    imgs = [ImageStim(im, onset=4.2, duration=1) for im in imgs]
    ext = TensorFlowKerasInceptionV3Extractor()
    results = ext.transform(imgs)
    df = merge_results(results, format='wide', extractor_names='multi')
    assert df.shape == (2, 19)
    true = 0.9737075
    pred = df['TensorFlowKerasInceptionV3Extractor'].loc[0, 'Granny_Smith']
    np.isclose(true, pred, 1e-05)
    true = 0.64234024
    pred = df['TensorFlowKerasInceptionV3Extractor'].loc[1, 'Windsor_tie']
    np.isclose(true, pred, 1e-05)
    assert 4.2 in df[('onset', np.nan)].values
    assert 1 in df[('duration', np.nan)].values
Exemplo n.º 26
0
def test_merge_extractor_results():
    np.random.seed(100)
    image_dir = join(get_test_data_path(), 'image')
    stim1 = ImageStim(join(image_dir, 'apple.jpg'))
    stim2 = ImageStim(join(image_dir, 'obama.jpg'))
    de = DummyExtractor()
    de_names = ['Extractor1', 'Extractor2', 'Extractor3']
    results = [de.transform(stim1, name) for name in de_names]
    results += [de.transform(stim2, name) for name in de_names]
    df = merge_results(results)
    assert df.shape == (355, 14)
    cols = ['onset', 'class', 'filename', 'history', 'stim']
    assert df.columns.levels[0].unique().tolist() == de_names + cols
    assert df.columns.levels[1].unique().tolist() == ['duration', 0, 1, 2, '']
    assert set(df['stim'].unique()) == set(['obama.jpg', 'apple.jpg'])
Exemplo n.º 27
0
    def run(self, stim, merge=True, **merge_kwargs):
        ''' Executes the graph by calling all Transformers in sequence.

        Args:
            stim (str, Stim, list): One or more valid inputs to any
                Transformer's 'transform' call.
            merge (bool): If True, all results are merged into a single pandas
                DataFrame before being returned. If False, a list of
                ExtractorResult objects is returned (one per Extractor/Stim
                combination).
            merge_kwargs: Optional keyword arguments to pass onto the
                merge_results() call.
        '''
        results = list(chain(*[self.run_node(n, stim) for n in self.roots]))
        results = list(flatten(results))
        self._results = results  # For use in plotting
        return merge_results(results, **merge_kwargs) if merge else results
Exemplo n.º 28
0
    def run(self, stim, merge=True, **merge_kwargs):
        ''' Executes the graph by calling all Transformers in sequence.

        Args:
            stim (str, Stim, list): One or more valid inputs to any
                Transformer's 'transform' call.
            merge (bool): If True, all results are merged into a single pandas
                DataFrame before being returned. If False, a list of
                ExtractorResult objects is returned (one per Extractor/Stim
                combination).
            merge_kwargs: Optional keyword arguments to pass onto the
                merge_results() call.
        '''
        results = list(chain(*[self.run_node(n, stim) for n in self.roots]))
        results = list(flatten(results))
        self._results = results  # For use in plotting
        return merge_results(results, **merge_kwargs) if merge else results
Exemplo n.º 29
0
def test_merge_extractor_results_flattened():
    np.random.seed(100)
    image_dir = join(get_test_data_path(), 'image')
    stim1 = ImageStim(join(image_dir, 'apple.jpg'))
    stim2 = ImageStim(join(image_dir, 'obama.jpg'))
    de_names = ['Extractor1', 'Extractor2', 'Extractor3']
    des = [DummyExtractor(name=name) for name in de_names]
    results = [de.transform(stim1) for de in des]
    results += [de.transform(stim2) for de in des]
    df = merge_results(results, flatten_columns=True)
    de_cols = ['Extractor1_0', 'Extractor1_1', 'Extractor1_2',
               'Extractor2_0', 'Extractor2_1', 'Extractor2_2',
               'Extractor3_0', 'Extractor3_1', 'Extractor3_2']
    assert df.shape == (354, 16)
    cols = ['onset', 'class', 'filename', 'history', 'stim_name', 'duration',
            'source_file']
    assert set(df.columns.unique().tolist()) == set(cols + de_cols)
Exemplo n.º 30
0
def test_tfhub_text_transformer_sentence():
    stim = TextStim(join(TEXT_DIR, 'scandal.txt'))
    cstim = ComplexTextStim(join(TEXT_DIR, 'wonderful.txt'))
    ext = TFHubTextExtractor(ELECTRA_URL,
                             features='sent_encoding',
                             preprocessor_url_or_path=TOKENIZER_URL)
    res = ext.transform(cstim.elements[:6])
    df = merge_results(res, extractor_names=False)
    pmod = hub.KerasLayer(TOKENIZER_URL)
    mmod = hub.KerasLayer(ELECTRA_URL)
    true = mmod(pmod([cstim.elements[5].text]))\
                ['pooled_output'][0,20].numpy()
    assert np.isclose(df['sent_encoding'][5][20], true)
    with pytest.raises(ValueError) as err:
        TFHubTextExtractor(ELECTRA_URL,
                           preprocessor_url_or_path=TOKENIZER_URL,
                           output_key='key').transform(stim)
    assert 'Check which keys' in str(err.value)
Exemplo n.º 31
0
def test_vectorizer_extractor():
    pytest.importorskip('sklearn')
    stim = TextStim(join(TEXT_DIR, 'scandal.txt'))
    result = TextVectorizerExtractor().transform(stim).to_df()
    assert 'woman' in result.columns
    assert result['woman'][0] == 3

    from sklearn.feature_extraction.text import TfidfVectorizer
    custom_vectorizer = TfidfVectorizer()
    ext = TextVectorizerExtractor(vectorizer=custom_vectorizer)
    stim2 = TextStim(join(TEXT_DIR, 'simple_text.txt'))
    result = merge_results(ext.transform([stim, stim2]))
    assert ('TextVectorizerExtractor', 'woman') in result.columns
    assert 0.129568189476 in result[('TextVectorizerExtractor', 'woman')]

    ext = TextVectorizerExtractor(vectorizer='CountVectorizer',
                                  analyzer='char_wb',
                                  ngram_range=(2, 2))
    result = ext.transform(stim).to_df()
    assert 'wo' in result.columns
    assert result['wo'][0] == 6
Exemplo n.º 32
0
def test_indico_api_text_extractor():

    ext = IndicoAPITextExtractor(api_key=os.environ['INDICO_APP_KEY'],
                                 models=['emotion', 'personality'])
    assert ext.validate_keys()

    # With ComplexTextStim input
    srtfile = join(get_test_data_path(), 'text', 'wonderful.srt')
    srt_stim = ComplexTextStim(srtfile, onset=4.2)
    result = merge_results(ext.transform(srt_stim), extractor_names=False)
    outdfKeysCheck = {
        'onset',
        'duration',
        'order',
        'object_id',
        'emotion_anger',
        'emotion_fear',
        'emotion_joy',
        'emotion_sadness',
        'emotion_surprise',
        'personality_openness',
        'personality_extraversion',
        'personality_agreeableness',
        'personality_conscientiousness'}
    meta_columns = {'source_file',
                    'history',
                    'class',
                    'filename'}
    assert set(result.columns) - set(['stim_name']) == outdfKeysCheck | meta_columns
    assert result['onset'][1] == 92.622

    # With TextStim input
    ts = TextStim(text="It's a wonderful life.")
    result = ext.transform(ts).to_df(object_id=True)
    assert set(result.columns) == outdfKeysCheck
    assert len(result) == 1

    ext = IndicoAPITextExtractor(api_key='nogood', models=['language'])
    assert not ext.validate_keys()
Exemplo n.º 33
0
def test_vectorizer_extractor():
    pytest.importorskip('sklearn')
    stim = TextStim(join(TEXT_DIR, 'scandal.txt'))
    result = TextVectorizerExtractor().transform(stim).to_df()
    assert 'woman' in result.columns
    assert result['woman'][0] == 3

    from sklearn.feature_extraction.text import TfidfVectorizer
    custom_vectorizer = TfidfVectorizer()
    ext = TextVectorizerExtractor(vectorizer=custom_vectorizer)
    stim2 = TextStim(join(TEXT_DIR, 'simple_text.txt'))
    result = merge_results(ext.transform([stim, stim2]), format='wide',
                           extractor_names='multi')
    assert ('TextVectorizerExtractor', 'woman') in result.columns
    assert np.allclose(0.129568189476,
                       result[('TextVectorizerExtractor', 'woman')][0])

    ext = TextVectorizerExtractor(vectorizer='CountVectorizer',
                                  analyzer='char_wb',
                                  ngram_range=(2, 2))
    result = ext.transform(stim).to_df()
    assert 'wo' in result.columns
    assert result['wo'][0] == 6
Exemplo n.º 34
0
def test_merge_extractor_results():
    np.random.seed(100)
    image_dir = join(get_test_data_path(), 'image')
    stim1 = ImageStim(join(image_dir, 'apple.jpg'))
    stim2 = ImageStim(join(image_dir, 'obama.jpg'))
    de_names = ['Extractor1', 'Extractor2', 'Extractor3']
    des = [DummyExtractor(name=name) for name in de_names]
    not_features = ['object_id']
    for de in des:
        not_features.append(de._log_attributes)
    results = [de.transform(stim1) for de in des]
    results += [de.transform(stim2) for de in des]

    df = merge_results(results, format='wide')
    assert df.shape == (200, 18)
    cols = [
        'onset', 'duration', 'order', 'class', 'filename', 'history',
        'stim_name', 'source_file'
    ]
    assert not set(cols) - set(df.columns)
    assert 'Extractor2#feature_3' in df.columns

    df = merge_results(results, format='wide', extractor_names='drop')
    assert df.shape == (200, 12)
    assert not set(cols) - set(df.columns)
    assert 'feature_3' in df.columns

    df = merge_results(results, format='wide', extractor_names='multi')
    assert df.shape == (200, 18)
    _cols = [(c, np.nan) for c in cols]
    assert not set(_cols) - set(df.columns)
    assert ('Extractor2', 'feature_3') in df.columns

    with pytest.raises(ValueError):
        merge_results(results, format='long', extractor_names='multi')

    df = merge_results(results, format='long', extractor_names='column')
    assert df.shape == (1800, 12)
    _cols = cols + ['feature', 'extractor', 'value']
    assert not set(_cols) - set(df.columns)
    row = df.iloc[523, :]
    assert row['feature'] == 'feature_2'
    assert row['value'] == 475
    assert row['extractor'] == 'Extractor1'
    assert not set(not_features).intersection(set(df['feature']))

    df = merge_results(results, format='long', extractor_names='drop')
    assert df.shape == (1800, 11)
    assert set(_cols) - set(df.columns) == {'extractor'}
    assert not set(not_features).intersection(set(df['feature']))

    df = merge_results(results, format='long', extractor_names='prepend')
    assert df.shape == (1800, 11)
    row = df.iloc[523, :]
    assert row['feature'] == 'Extractor1#feature_2'
    assert not set(not_features).intersection(set(df['feature']))

    df = merge_results(results, format='wide', extractor_params=True)
    logattr = {}
    for de in des:
        logattr[
            de.
            name] = de._log_attributes  #stores log attributes to be found for each extractor
        for feat in ['feature_1', 'feature_2', 'feature_3']:
            idx_str = f'{de.name}#{feat}#extractor_params'
            assert idx_str in df.columns
            df_log_attr = json.loads(df[idx_str][0])
            for l in logattr[de.name]:
                assert l in df_log_attr.keys()

    df = merge_results(results, format='long', extractor_params=True)
    for idx, row in df.iterrows():
        de_name = row['feature'].split('#')[0]
        logs = logattr[de_name]
        df_logs = row['extractor_params']
        for l in logs:
            assert l in json.loads(df_logs).keys()
Exemplo n.º 35
0
 def run(self, stim, merge=True):
     results = list(chain(*[self.run_node(n, stim) for n in self.roots]))
     results = list(flatten(results))
     self._results = results  # For use in plotting
     return merge_results(results) if merge else results