Пример #1
0
def test_compound_stim():
    audio_dir = join(get_test_data_path(), 'audio')
    audio = AudioStim(join(audio_dir, 'crowd.mp3'))
    image1 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg'))
    image2 = ImageStim(join(get_test_data_path(), 'image', 'obama.jpg'))
    filename = join(get_test_data_path(), 'video', 'small.mp4')
    video = VideoStim(filename)
    text = ComplexTextStim(text="The quick brown fox jumped...")
    stim = CompoundStim([audio, image1, image2, video, text])
    assert len(stim.elements) == 5
    assert isinstance(stim.video, VideoStim)
    assert isinstance(stim.complex_text, ComplexTextStim)
    assert isinstance(stim.image, ImageStim)
    with pytest.raises(AttributeError):
        stim.nonexistent_type
    assert stim.video_frame is None

    # Test iteration
    len([e for e in stim]) == 5

    imgs = stim.get_stim(ImageStim, return_all=True)
    assert len(imgs) == 2
    assert all([isinstance(im, ImageStim) for im in imgs])
    also_imgs = stim.get_stim('image', return_all=True)
    assert imgs == also_imgs
Пример #2
0
def test_transcribed_audio_stim():
    audio = AudioStim(join(get_test_data_path(), 'audio', "barber_edited.wav"))
    text_file = join(get_test_data_path(), 'text', "wonderful_edited.srt")
    text = ComplexTextStim(text_file)
    stim = TranscribedAudioCompoundStim(audio=audio, text=text)
    assert isinstance(stim.audio, AudioStim)
    assert isinstance(stim.complex_text, ComplexTextStim)
Пример #3
0
def test_complex_stim_from_srt():
    srtfile = join(get_test_data_path(), 'text', 'wonderful.srt')
    textfile = join(get_test_data_path(), 'text', 'wonderful.txt')
    df = pd.read_csv(textfile, sep='\t')
    target = df["text"].tolist()
    srt_stim = ComplexTextStim(srtfile)
    texts = [sent.text for sent in srt_stim.elements]
    assert texts == target
Пример #4
0
def test_transformations_on_compound_stim():
    image1 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg'))
    image2 = ImageStim(join(get_test_data_path(), 'image', 'obama.jpg'))
    text = ComplexTextStim(text="The quick brown fox jumped...")
    stim = CompoundStim([image1, image2, text])

    ext = BrightnessExtractor()
    results = ext.transform(stim)
    assert len(results) == 2
    assert np.allclose(results[0]._data[0], 0.88784294)
Пример #5
0
def test_magic_loader():
    text_file = join(get_test_data_path(), 'text', 'sample_text.txt')
    audio_file = join(get_test_data_path(), 'audio', 'barber.wav')
    video_file = join(get_test_data_path(), 'video', 'small.mp4')
    stim_files = [text_file, audio_file, video_file]
    stims = load_stims(stim_files)
    assert len(stims) == 3
    assert round(stims[1].duration) == 57
    assert isinstance(stims[0].text, str)
    assert stims[2].width == 560
Пример #6
0
def test_save():
    cts_file = join(get_test_data_path(), 'text', 'complex_stim_no_header.txt')
    complextext_stim = ComplexTextStim(cts_file, columns='ot',
                                       default_duration=0.2)
    text_stim = TextStim(text='hello')
    audio_stim = AudioStim(join(get_test_data_path(), 'audio', 'crowd.mp3'))
    image_stim = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg'))

    # Video gives travis problems
    stims = [complextext_stim, text_stim, audio_stim, image_stim]
    for s in stims:
        path = tempfile.mktemp() + s._default_file_extension
        s.save(path)
        assert exists(path)
        os.remove(path)
Пример #7
0
def test_video_frame_stim():
    filename = join(get_test_data_path(), 'video', 'small.mp4')
    video = VideoStim(filename, onset=4.2)
    frame = VideoFrameStim(video, 42)
    assert frame.onset == (5.6)
    assert np.array_equal(frame.data, video.get_frame(index=42).data)
    assert frame.name == 'frame[42]'
Пример #8
0
def test_implicit_stim_conversion():
    image_dir = join(get_test_data_path(), 'image')
    stim = ImageStim(join(image_dir, 'button.jpg'), onset=4.2)
    ext = LengthExtractor()
    result = ext.transform(stim).to_df()
    assert 'text_length' in result.columns
    assert result['text_length'][0] == 4
    assert result['onset'][0] == 4.2
Пример #9
0
def test_implicit_stim_iteration():
    np.random.seed(100)
    image_dir = join(get_test_data_path(), 'image')
    stim1 = ImageStim(join(image_dir, 'apple.jpg'))
    stim2 = ImageStim(join(image_dir, 'obama.jpg'))
    de = DummyExtractor()
    results = de.transform([stim1, stim2])
    assert len(results) == 2
    assert isinstance(results[0], ExtractorResult)
Пример #10
0
def test_audio_stim():
    audio_dir = join(get_test_data_path(), 'audio')
    stim = AudioStim(join(audio_dir, 'barber.wav'))
    assert round(stim.duration) == 57
    assert stim.sampling_rate == 11025

    stim = AudioStim(join(audio_dir, 'homer.wav'))
    assert round(stim.duration) == 3
    assert stim.sampling_rate == 11025
Пример #11
0
def test_magic_loader2():
    text_file = join(get_test_data_path(), 'text', 'sample_text.txt')
    video_url = 'https://archive.org/download/DisneyCastletest/Disney_Castle_512kb.mp4'
    audio_url = 'https://archive.org/download/999WavFiles/TANKEN.WAV'
    image_url = 'https://archive.org/download/NIX-C-1987-11903/1987_11903L.jpg'
    text_url = 'https://github.com/psychoinformaticslab/pliers/blob/master/README.rst'
    stims = load_stims([text_file, video_url, audio_url, image_url, text_url])
    assert len(stims) == 5
    assert stims[1].fps == 30.0
    assert stims[3].data.shape == (288, 360, 3)
Пример #12
0
def test_video_stim_bytestring():
    path = join(get_test_data_path(), 'video', 'small.mp4')
    vid = VideoStim(path)
    assert vid._bytestring is None
    bs = vid.get_bytestring()
    assert isinstance(bs, str)
    assert vid._bytestring is not None
    raw = bs.encode()
    with open(path, 'rb') as f:
        assert raw == base64.b64encode(f.read())
Пример #13
0
def test_implicit_stim_conversion3():
    video_dir = join(get_test_data_path(), 'video')
    stim = VideoStim(join(video_dir, 'obama_speech.mp4'), onset=4.2)
    ext = LengthExtractor()
    result = ext.transform(stim)
    first_word = result[0].to_df()
    # The word should be "today"
    assert 'text_length' in first_word.columns
    assert first_word['text_length'][0] == 5
    assert first_word['onset'][0] >= 4.2
Пример #14
0
def test_image_stim_bytestring():
    path = join(get_test_data_path(), 'image', 'apple.jpg')
    img = ImageStim(path)
    assert img._bytestring is None
    bs = img.get_bytestring()
    assert isinstance(bs, str)
    assert img._bytestring is not None
    raw = bs.encode()
    with open(path, 'rb') as f:
        assert raw == base64.b64encode(f.read())
Пример #15
0
def test_video_stim():
    ''' Test VideoStim functionality. '''
    filename = join(get_test_data_path(), 'video', 'small.mp4')
    video = VideoStim(filename, onset=4.2)
    assert video.fps == 30
    assert video.n_frames == 168
    assert video.width == 560
    assert video.duration == 5.57

    # Test frame iterator
    frames = [f for f in video]
    assert len(frames) == 168
    f1 = frames[100]
    assert isinstance(f1, VideoFrameStim)
    assert isinstance(f1.onset, float)
    assert np.isclose(f1.duration, 1 / 30.0, 1e-5)
    f1.data.shape == (320, 560, 3)

    # Test getting of specific frame
    f2 = video.get_frame(index=100)
    assert isinstance(f2, VideoFrameStim)
    assert isinstance(f2.onset, float)
    assert f2.onset > 7.5
    f2.data.shape == (320, 560, 3)
    f2_copy = video.get_frame(onset=3.33334)
    assert isinstance(f2, VideoFrameStim)
    assert isinstance(f2.onset, float)
    assert f2.onset > 7.5
    assert np.array_equal(f2.data, f2_copy.data)

    # Try another video
    filename = join(get_test_data_path(), 'video', 'obama_speech.mp4')
    video = VideoStim(filename)
    assert video.fps == 12
    assert video.n_frames == 105
    assert video.width == 320
    assert video.duration == 8.71
    f3 = video.get_frame(index=104)
    assert isinstance(f3, VideoFrameStim)
    assert isinstance(f3.onset, float)
    assert f3.duration > 0.0
    assert f3.data.shape == (240, 320, 3)
Пример #16
0
def test_complex_stim_from_text():
    textfile = join(get_test_data_path(), 'text', 'scandal.txt')
    text = open(textfile).read().strip()
    stim = ComplexTextStim(text=text)
    target = ['To', 'Sherlock', 'Holmes']
    assert [w.text for w in stim.elements[:3]] == target
    assert len(stim.elements) == 231
    stim = ComplexTextStim(text=text, unit='sent')
    # Custom tokenizer
    stim = ComplexTextStim(text=text, tokenizer=r'(\w+)')
    assert len(stim.elements) == 209
Пример #17
0
def test_clashing_key_rename():
    image_dir = join(get_test_data_path(), 'image')
    stim1 = ImageStim(join(image_dir, 'apple.jpg'))

    cf = ClashingFeatureExtractor()
    results = cf.transform(stim1)
    df = results.to_df()
    expected_keys = ['order', 'duration', 'onset', 'object_id',
                     'feature_1', 'feature_2', 'order_']
    for k in expected_keys:
        assert k in df.columns
Пример #18
0
def test_implicit_stim_conversion2():
    def_conv = config.get_option('default_converters')
    config.set_option('default_converters',
                      {'AudioStim->TextStim': ('WitTranscriptionConverter', )})
    audio_dir = join(get_test_data_path(), 'audio')
    stim = AudioStim(join(audio_dir, 'homer.wav'), onset=4.2)
    ext = LengthExtractor()
    result = ext.transform(stim)
    first_word = result[0].to_df()
    assert 'text_length' in first_word.columns
    assert first_word['text_length'][0] > 0
    assert first_word['onset'][0] >= 4.2
    config.set_option('default_converters', def_conv)
Пример #19
0
def test_loader_nonexistent():
    text_file = 'this/doesnt/exist.txt'
    with pytest.raises(IOError):
        stims = load_stims(text_file)

    audio_file = 'no/audio/here.wav'
    with pytest.raises(IOError):
        stims = load_stims([text_file, audio_file])

    text_file = join(get_test_data_path(), 'text', 'sample_text.txt')
    stims = load_stims([text_file, audio_file], fail_silently=True)
    assert len(stims) == 1

    with pytest.raises(IOError):
        stims = load_stims(audio_file, fail_silently=True)
Пример #20
0
def test_complex_text_stim():
    text_dir = join(get_test_data_path(), 'text')
    stim = ComplexTextStim(join(text_dir, 'complex_stim_no_header.txt'),
                           columns='ot', default_duration=0.2)
    assert len(stim.elements) == 4
    assert stim.elements[2].onset == 34
    assert stim.elements[2].duration == 0.2
    stim = ComplexTextStim(join(text_dir, 'complex_stim_no_header.txt'),
                           columns='ot', default_duration=0.2, onset=4.2)
    assert stim.elements[2].onset == 38.2
    assert stim.elements[1].onset == 24.2
    stim = ComplexTextStim(join(text_dir, 'complex_stim_with_header.txt'))
    assert len(stim.elements) == 4
    assert stim.elements[2].duration == 0.1

    assert stim._to_sec((1.0, 42, 3, 0)) == 6123
    assert stim._to_tup(6123) == (1.0, 42, 3, 0)
Пример #21
0
def test_series():
    my_dict = {'a': 4, 'b': 2, 'c': 8}
    stim = SeriesStim(my_dict, onset=4, duration=2)
    ser = pd.Series([4, 2, 8], index=['a', 'b', 'c'])
    pd.testing.assert_series_equal(stim.data, ser)
    assert stim.onset == 4
    assert stim.duration == 2
    assert stim.order is None

    f = Path(get_test_data_path(), 'text', 'test_lexical_dictionary.txt')
    # multiple columns found and no column arg provided
    with pytest.raises(ValueError):
        stim = SeriesStim(filename=f, sep='\t')

    stim = SeriesStim(filename=f, column='frequency', sep='\t')
    assert stim.data.shape == (7,)
    assert stim.data[3] == 15.417

    # 2-d array should fail
    with pytest.raises(Exception):
        ser = SeriesStim(np.random.normal(size=(10, 2)))
Пример #22
0
def test_metric_extractor():
    def dummy(array):
        return array[0]

    def dummy_list(array):
        return array[0], array[1]

    f = Path(get_test_data_path(), 'text', 'test_lexical_dictionary.txt')
    stim = SeriesStim(data=np.linspace(1., 4., 20), onset=2., duration=.5)
    stim_file = SeriesStim(filename=f,
                           column='frequency',
                           sep='\t',
                           index_col='text')

    ext_single = MetricExtractor(functions='numpy.mean')
    ext_idx = MetricExtractor(functions='numpy.mean',
                              subset_idx=['for', 'testing', 'text'])
    ext_multiple = MetricExtractor(functions=[
        'numpy.mean', 'numpy.min', scipy.stats.entropy, dummy, dummy_list
    ])
    ext_names = MetricExtractor(
        functions=[
            'numpy.mean', 'numpy.min', scipy.stats.entropy, dummy, dummy_list,
            'tensorflow.reduce_mean'
        ],
        var_names=['mean', 'min', 'entropy', 'custom1', 'custom2', 'tf_mean'])
    ext_lambda = MetricExtractor(functions='lambda x: -np.max(x)',
                                 var_names='custom_function')

    r = ext_single.transform(stim)
    r_file = ext_single.transform(stim_file)
    r_file_idx = ext_idx.transform(stim_file)
    r_multiple = ext_multiple.transform(stim)
    r_names = ext_names.transform(stim)
    r_lambda = ext_lambda.transform(stim)

    r_df = r.to_df()
    r_file_df = r_file.to_df()
    r_file_idx_df = r_file_idx.to_df()
    r_multiple_df = r_multiple.to_df()
    r_long = r_multiple.to_df(format='long')
    r_names_df = r_names.to_df()
    r_lambda_df = r_lambda.to_df()

    for res in [r_df, r_file_df, r_multiple_df]:
        assert res.shape[0] == 1
    assert r_long.shape[0] == len(ext_multiple.functions)
    assert r_df['onset'][0] == 2
    assert r_df['duration'][0] == .5
    assert r_df['mean'][0] == 2.5
    assert np.isclose(r_file_df['mean'][0], 11.388, rtol=0.001)
    assert np.isclose(r_file_idx_df['mean'][0], 12.582, rtol=0.001)
    assert all([m in r_multiple_df.columns for m in ['mean', 'entropy']])
    assert r_multiple_df['amin'][0] == 1.
    assert r_multiple_df['dummy'][0] == 1.
    assert r_multiple_df['dummy_list'][0][0] == np.linspace(1., 4., 20)[0]
    assert r_multiple_df['dummy_list'][0][1] == np.linspace(1., 4., 20)[1]
    assert type(r_multiple_df['dummy_list'][0]) == np.ndarray
    assert r_names_df.columns[-3] == 'custom1'
    assert r_names_df.columns[-2] == 'custom2'
    assert r_names_df.columns[-1] == 'tf_mean'
    assert np.isclose(r_names_df['mean'][0], r_names_df['tf_mean'][0])
    assert r_lambda_df['custom_function'][0] == -4
Пример #23
0
def test_audio_formats():
    audio_dir = join(get_test_data_path(), 'audio')
    stim = AudioStim(join(audio_dir, 'crowd.mp3'))
    assert round(stim.duration) == 28
    assert stim.sampling_rate == 44100
Пример #24
0
def test_merge_extractor_results():
    np.random.seed(100)
    image_dir = join(get_test_data_path(), 'image')
    stim1 = ImageStim(join(image_dir, 'apple.jpg'))
    stim2 = ImageStim(join(image_dir, 'obama.jpg'))
    de_names = ['Extractor1', 'Extractor2', 'Extractor3']
    des = [DummyExtractor(name=name) for name in de_names]
    not_features = ['object_id']
    for de in des:
        not_features.append(de._log_attributes)
    results = [de.transform(stim1) for de in des]
    results += [de.transform(stim2) for de in des]

    df = merge_results(results, format='wide')
    assert df.shape == (200, 18)
    cols = [
        'onset', 'duration', 'order', 'class', 'filename', 'history',
        'stim_name', 'source_file'
    ]
    assert not set(cols) - set(df.columns)
    assert 'Extractor2#feature_3' in df.columns

    df = merge_results(results, format='wide', extractor_names='drop')
    assert df.shape == (200, 12)
    assert not set(cols) - set(df.columns)
    assert 'feature_3' in df.columns

    df = merge_results(results, format='wide', extractor_names='multi')
    assert df.shape == (200, 18)
    _cols = [(c, np.nan) for c in cols]
    assert not set(_cols) - set(df.columns)
    assert ('Extractor2', 'feature_3') in df.columns

    with pytest.raises(ValueError):
        merge_results(results, format='long', extractor_names='multi')

    df = merge_results(results, format='long', extractor_names='column')
    assert df.shape == (1800, 12)
    _cols = cols + ['feature', 'extractor', 'value']
    assert not set(_cols) - set(df.columns)
    row = df.iloc[523, :]
    assert row['feature'] == 'feature_2'
    assert row['value'] == 475
    assert row['extractor'] == 'Extractor1'
    assert not set(not_features).intersection(set(df['feature']))

    df = merge_results(results, format='long', extractor_names='drop')
    assert df.shape == (1800, 11)
    assert set(_cols) - set(df.columns) == {'extractor'}
    assert not set(not_features).intersection(set(df['feature']))

    df = merge_results(results, format='long', extractor_names='prepend')
    assert df.shape == (1800, 11)
    row = df.iloc[523, :]
    assert row['feature'] == 'Extractor1#feature_2'
    assert not set(not_features).intersection(set(df['feature']))

    df = merge_results(results, format='wide', extractor_params=True)
    logattr = {}
    for de in des:
        logattr[
            de.
            name] = de._log_attributes  #stores log attributes to be found for each extractor
        for feat in ['feature_1', 'feature_2', 'feature_3']:
            idx_str = f'{de.name}#{feat}#extractor_params'
            assert idx_str in df.columns
            df_log_attr = json.loads(df[idx_str][0])
            for l in logattr[de.name]:
                assert l in df_log_attr.keys()

    df = merge_results(results, format='long', extractor_params=True)
    for idx, row in df.iterrows():
        de_name = row['feature'].split('#')[0]
        logs = logattr[de_name]
        df_logs = row['extractor_params']
        for l in logs:
            assert l in json.loads(df_logs).keys()
Пример #25
0
import numpy as np
import pytest

from pliers.tests.utils import (get_test_data_path, DummyExtractor,
                                ClashingFeatureExtractor)
from pliers.extractors import (LengthExtractor, BrightnessExtractor,
                               SharpnessExtractor, VibranceExtractor)
from pliers.stimuli import (ComplexTextStim, ImageStim, VideoStim, AudioStim)
from pliers.support.download import download_nltk_data
from pliers.extractors.base import ExtractorResult, merge_results
from pliers import config

cache_default = config.get_option('cache_transformers')
config.set_option('cache_transformers', True)

TEXT_DIR = join(get_test_data_path(), 'text')


@pytest.fixture(scope='module')
def get_nltk():
    download_nltk_data()


def test_check_target_type():
    stim = ComplexTextStim(join(TEXT_DIR, 'sample_text.txt'),
                           columns='to',
                           default_duration=1)
    td = SharpnessExtractor()
    with pytest.raises(TypeError):
        td.transform(stim)
Пример #26
0
from pliers import config
from pliers.extractors import (TensorFlowKerasApplicationExtractor,
                               TFHubExtractor, TFHubImageExtractor,
                               TFHubTextExtractor, BertExtractor,
                               BertSequenceEncodingExtractor, BertLMExtractor,
                               BertSentimentExtractor, AudiosetLabelExtractor)
from pliers.filters import AudioResamplingFilter
from pliers.stimuli import (ImageStim, TextStim, ComplexTextStim, AudioStim)
from pliers.extractors.base import merge_results
from transformers import BertTokenizer
from pliers.utils import verify_dependencies

cache_default = config.get_option('cache_transformers')
config.set_option('cache_transformers', False)

IMAGE_DIR = join(get_test_data_path(), 'image')
TEXT_DIR = join(get_test_data_path(), 'text')
AUDIO_DIR = join(get_test_data_path(), 'audio')

EFFNET_URL = 'https://tfhub.dev/tensorflow/efficientnet/b7/classification/1'
MNET_URL = 'https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4'
SENTENC_URL = 'https://tfhub.dev/google/universal-sentence-encoder/4'
GNEWS_URL = 'https://tfhub.dev/google/nnlm-en-dim128-with-normalization/2'
TOKENIZER_URL = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/2'
ELECTRA_URL = 'https://tfhub.dev/google/electra_small/2'
SPEECH_URL = 'https://tfhub.dev/google/speech_embedding/1'

pytestmark = pytest.mark.skipif(environ.get('skip_high_memory',
                                            False) == 'true',
                                reason='high memory')
Пример #27
0
def test_image_stim(dummy_iter_extractor):
    filename = join(get_test_data_path(), 'image', 'apple.jpg')
    stim = ImageStim(filename)
    assert stim.data.shape == (288, 420, 3)