def _transform(self, stim, *args, **kwargs): # Check if we are requesting faster than the rate limit, # if so, throttle by sleeping time_diff = time.time() - self._last_request_time if time_diff < self.rate_limit: time.sleep(self.rate_limit - time_diff) self._last_request_time = time.time() # Check if we are trying to transform a large amount of data self.transformed_stim_count += len(listify(stim)) if not config.get_option('allow_large_jobs'): if not isiterable(stim) and stim.duration \ and stim.duration > config.get_option('long_job'): raise ValueError("Attempted to run an API transformation " "on a stimulus of duration %f, aborting. " "To allow this transformation, set " "config option 'allow_large_jobs' to " "True." % stim.duration) if self.transformed_stim_count > config.get_option('large_job'): raise ValueError("Number of transformations using this %s " "would exceed %d, aborting further " "transformations. To allow, set config " "option 'allow_large_jobs' to True." % (self.__class__.__name__, config.get_option('large_job'))) if config.get_option('api_key_validation') and not self.validate_keys(): raise ValueError("Error running %s, a provided environment key " "was invalid or unauthorized. Please check that " "you have authorized credentials for accessing " "the target API." % self.__class__.__name__) return super(APITransformer, self)._transform(stim, *args, **kwargs)
def _iterate(self, stims, *args, **kwargs): if config.get_option('parallelize') and multiprocessing is not None: def _transform(s): return self.transform(s, *args, **kwargs) n_jobs = config.get_option('n_jobs') return multiprocessing.ProcessingPool(n_jobs) \ .map(_transform, stims) return (t for t in (self.transform(s, *args, **kwargs) for s in stims) if t)
def test_microsoft_vision_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') config.set_option('allow_large_jobs', False) config.set_option('large_job', 3) ext = MicrosoftVisionAPITagExtractor() video = VideoStim(join(VIDEO_DIR, 'small.mp4')) with pytest.raises(ValueError): merge_results(ext.transform(video)) config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large)
def _log_transformation(source, result, trans=None, implicit=False): if result is None or not config.get_option('log_transformations') or \ (trans is not None and not trans._loggable): return result if isiterable(result): return (_log_transformation(source, r, trans) for r in result) values = [source.name, source.filename, source.__class__.__name__] if isinstance(result, Stim): values.extend([result.name, result.filename]) else: values.extend(['', '']) values.append(result.__class__.__name__) if trans is not None: values.append(trans.__class__.__name__) tr_attrs = [getattr(trans, attr) for attr in trans._log_attributes] values.append(str(dict(zip(trans._log_attributes, tr_attrs)))) else: values.append(['', '']) parent = source.history string = str(parent) if parent else values[2] string += '->%s/%s' % (values[6], values[5]) values.extend([string, parent]) values.append(implicit) result.history = TransformationLog(*values) return result
def get_converter(in_type, out_type, *args, **kwargs): ''' Scans the list of available Converters and returns an instantiation of the first one whose input and output types match those passed in. Args: in_type (type): The type of input the converter must have. out_type (type): The type of output the converter must have. args, kwargs: Optional positional and keyword arguments to pass onto matching Converter's initializer. ''' convs = pliers.converters.__all__ # If config includes default converters for this combination, try them # first out_type = listify(out_type)[::-1] default_convs = config.get_option('default_converters') for ot in out_type: conv_str = '%s->%s' % (in_type.__name__, ot.__name__) if conv_str in default_convs: convs = list(default_convs[conv_str]) + convs for name in convs: cls = getattr(pliers.converters, name) if not inspect.isclass(cls) or not issubclass(cls, Converter): continue available = cls.available if issubclass(cls, EnvironmentKeyMixin) else True if cls._input_type == in_type and cls._output_type in out_type \ and available: conv = cls(*args, **kwargs) return conv return None
def test_caching(): cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', True) img1 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg')) ext = DummyExtractor() res = ext.transform(img1) assert ext.num_calls == 1 res2 = ext.transform(img1) assert ext.num_calls == 1 assert res == res2 config.set_option('cache_transformers', False) res3 = ext.transform(img1) assert ext.num_calls == 2 assert res != res3 config.set_option('cache_transformers', True) ext.num_calls = 0 res = ext.transform(join(get_test_data_path(), 'image', 'apple.jpg')) assert ext.num_calls == 1 res2 = ext.transform(join(get_test_data_path(), 'image', 'apple.jpg')) assert ext.num_calls == 1 assert res == res2 config.set_option('cache_transformers', cache_default)
def _log_transformation(source, result, trans=None, implicit=False): if result is None or not config.get_option('log_transformations') or \ (trans is not None and not trans._loggable): return result if isiterable(result): return (_log_transformation(source, r, trans) for r in result) # Converters are no longer restricted to Stim inputs, so ensure name and # filename are set. name = getattr(source, 'name', None) filename = getattr(source, 'filename', None) values = [name, filename, source.__class__.__name__] if isinstance(result, Stim): values.extend([result.name, result.filename]) else: values.extend(['', '']) values.append(result.__class__.__name__) if trans is not None: values.append(trans.__class__.__name__) tr_attrs = [getattr(trans, attr) for attr in trans._log_attributes] values.append(str(dict(zip(trans._log_attributes, tr_attrs)))) else: values.append(['', '']) parent = source.history string = str(parent) if parent else values[2] string += '->{}/{}'.format(values[6], values[5]) values.extend([string, parent]) values.append(implicit) result.history = TransformationLog(*values) return result
def test_validation_levels(caplog): cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', False) ext = BrightnessExtractor() stim = TextStim(text='hello world') with pytest.raises(TypeError): ext.transform(stim) res = ext.transform(stim, validation='warn') log_message = caplog.records[0].message assert log_message == ( "Transformers of type BrightnessExtractor can " "only be applied to stimuli of type(s) <class 'pliers" ".stimuli.image.ImageStim'> (not type TextStim), and no " "applicable Converter was found.") assert not res res = ext.transform(stim, validation='loose') assert not res stim2 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg')) res = ext.transform([stim, stim2], validation='loose') assert len(res) == 1 assert np.isclose(res[0].to_df()['brightness'][0], 0.88784294, 1e-5) config.set_option('cache_transformers', cache_default)
def _log_transformation(source, result, trans=None): if result is None or not config.get_option('log_transformations') or \ (trans is not None and not trans._loggable): return result if isiterable(result): return (_log_transformation(source, r, trans) for r in result) values = [source.name, source.filename, source.__class__.__name__] if isinstance(result, Stim): values.extend([result.name, result.filename]) else: values.extend(['', '']) values.append(result.__class__.__name__) if trans is not None: values.append(trans.__class__.__name__) tr_attrs = [getattr(trans, attr) for attr in trans._log_attributes] values.append(str(dict(zip(trans._log_attributes, tr_attrs)))) else: values.append(['', '']) parent = source.history string = str(parent) if parent else values[2] string += '->%s/%s' % (values[6], values[5]) values.extend([string, parent]) result.history = TransformationLog(*values) return result
def test_validation_levels(caplog): cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', False) ext = BrightnessExtractor() stim = TextStim(text='hello world') with pytest.raises(TypeError): ext.transform(stim) res = ext.transform(stim, validation='warn') log_message = caplog.records[0].message assert log_message == ("Transformers of type BrightnessExtractor can " "only be applied to stimuli of type(s) <class 'pliers" ".stimuli.image.ImageStim'> (not type TextStim), and no " "applicable Converter was found.") assert not res res = ext.transform(stim, validation='loose') assert not res stim2 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg')) res = ext.transform([stim, stim2], validation='loose') assert len(res) == 1 assert np.isclose(res[0].to_df()['brightness'][0], 0.88784294, 1e-5) config.set_option('cache_transformers', cache_default)
def test_clarifai_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) ext = ClarifaiAPIExtractor() images = [ImageStim(join(get_test_data_path(), 'image', 'apple.jpg'))] * 2 with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'ClarifaiAPIExtractor#apple' in results.columns assert results.shape == (1, 29) # not 2 cause all the same instance config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large)
def test_clarifai_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) ext = ClarifaiAPIImageExtractor() images = [ImageStim(join(IMAGE_DIR, 'apple.jpg')), ImageStim(join(IMAGE_DIR, 'obama.jpg'))] with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'ClarifaiAPIImageExtractor#apple' in results.columns assert results.shape == (2, 49) config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large)
def test_indico_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) ext = IndicoAPIImageExtractor(models=['fer']) images = [ImageStim(join(IMAGE_DIR, 'apple.jpg'))] * 2 with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'IndicoAPIImageExtractor#fer_Neutral' in results.columns assert results.shape == (1, 15) # not 2 rows cause all the same instance config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large)
def test_ibm_speech_converter_large(): default = config.get_option('allow_large_jobs') config.set_option('allow_large_jobs', False) conv = IBMSpeechAPIConverter() audio = AudioStim(join(AUDIO_DIR, 'silence.wav')) with pytest.raises(ValueError): conv.transform(audio) config.set_option('allow_large_jobs', default)
def set_iterable_type(obj): ''' Returns either a generator or a list depending on config-level settings. Should be used to wrap almost every internal iterable return. Also inspects elements recursively in the case of list returns, to ensure that there are no nested generators. ''' if not isiterable(obj): return obj if config.get_option('use_generators'): return obj if isgenerator(obj) else (i for i in obj) else: return [set_iterable_type(i) for i in obj]
def test_indico_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) ext = IndicoAPIImageExtractor(models=['fer']) images = [ImageStim(join(IMAGE_DIR, 'apple.jpg')), ImageStim(join(IMAGE_DIR, 'obama.jpg'))] with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'IndicoAPIImageExtractor#fer_Neutral' in results.columns assert results.shape == (2, 15) config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large)
def wrapper(self, stim, *args, **kwargs): use_cache = config.get_option('cache_transformers') \ and isinstance(stim, (Stim, str)) if use_cache: key = hash((hash(self), hash(stim))) if key in _cache: return _cache[key] result = transform(self, stim, *args, **kwargs) if use_cache: if isgenerator(result): result = list(result) _cache[key] = result return result
def test_google_vision_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') default_cache = config.get_option('cache_transformers') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) config.set_option('cache_transformers', False) ext = GoogleVisionAPILabelExtractor() images = [ImageStim(join(IMAGE_DIR, 'apple.jpg')), ImageStim(join(IMAGE_DIR, 'obama.jpg'))] with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'GoogleVisionAPILabelExtractor#Apple' in results.columns assert results.shape == (2, 32) config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large) config.set_option('cache_transformers', default_cache)
def test_google_vision_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') default_cache = config.get_option('cache_transformers') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) config.set_option('cache_transformers', False) ext = GoogleVisionAPILabelExtractor() images = [ImageStim(join(IMAGE_DIR, 'apple.jpg')), ImageStim(join(IMAGE_DIR, 'obama.jpg'))] with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'GoogleVisionAPILabelExtractor#apple' in results.columns assert results.shape == (2, 32) config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large) config.set_option('cache_transformers', default_cache)
def test_implicit_stim_conversion2(): def_conv = config.get_option('default_converters') config.set_option('default_converters', {'AudioStim->TextStim': ('WitTranscriptionConverter', )}) audio_dir = join(get_test_data_path(), 'audio') stim = AudioStim(join(audio_dir, 'homer.wav'), onset=4.2) ext = LengthExtractor() result = ext.transform(stim) first_word = result[0].to_df() assert 'text_length' in first_word.columns assert first_word['text_length'][0] > 0 assert first_word['onset'][0] >= 4.2 config.set_option('default_converters', def_conv)
def test_implicit_stim_conversion2(): def_conv = config.get_option('default_converters') config.set_option('default_converters', {'AudioStim->TextStim': ('WitTranscriptionConverter',)}) audio_dir = join(get_test_data_path(), 'audio') stim = AudioStim(join(audio_dir, 'homer.wav'), onset=4.2) ext = LengthExtractor() result = ext.transform(stim) first_word = result[0].to_df() assert 'text_length' in first_word.columns assert first_word['text_length'][0] > 0 assert first_word['onset'][0] >= 4.2 config.set_option('default_converters', def_conv)
def test_parallelization(): # TODO: test that parallelization actually happened (this will likely # require some new logging functionality, or introspection). For now we # just make sure the parallelized version produces the same result. default = config.get_option('parallelize') cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', True) filename = join(get_test_data_path(), 'video', 'small.mp4') video = VideoStim(filename) ext = BrightnessExtractor() # With parallelization config.set_option('parallelize', True) result1 = ext.transform(video) # Without parallelization config.set_option('parallelize', False) result2 = ext.transform(video) assert result1 == result2 config.set_option('parallelize', default) config.set_option('cache_transformers', cache_default)
def test_microsoft_api_face_emotion_extractor(): ext = MicrosoftAPIFaceEmotionExtractor() img = ImageStim(join(IMAGE_DIR, 'obama.jpg')) res = ext.transform(img).to_df(timing=False, object_id=False) assert res.shape == (1, 8) assert res['face_emotion_happiness'][0] > 0.5 assert res['face_emotion_anger'][0] < 0.5 ext = MicrosoftAPIFaceEmotionExtractor(subscription_key='nogood') assert not ext.validate_keys() default = config.get_option('api_key_validation') config.set_option('api_key_validation', True) with pytest.raises(ValueError): ext.transform(img) config.set_option('api_key_validation', default)
def test_batch_transformer(): cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', False) img1 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg')) img2 = ImageStim(join(get_test_data_path(), 'image', 'button.jpg')) img3 = ImageStim(join(get_test_data_path(), 'image', 'obama.jpg')) ext = DummyBatchExtractor() res = merge_results(ext.transform([img1, img2, img3])) assert ext.num_calls == 1 assert res.shape == (3, 10) ext = DummyBatchExtractor(batch_size=1) res2 = merge_results(ext.transform([img1, img2, img3])) assert ext.num_calls == 3 assert res.equals(res2) config.set_option('cache_transformers', cache_default)
def test_progress_bar(capfd): video_dir = join(get_test_data_path(), 'video') video = VideoStim(join(video_dir, 'obama_speech.mp4')) conv = FrameSamplingFilter(hertz=2) old_val = config.get_option('progress_bar') config.set_option('progress_bar', True) derived = conv.transform(video) out, err = capfd.readouterr() assert 'Video frame:' in err and '100%' in err config.set_option('progress_bar', False) derived = conv.transform(video) out, err = capfd.readouterr() assert 'Video frame:' not in err and '100%' not in err config.set_option('progress_bar', old_val)
def test_batch_transformer_caching(): cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', True) img1 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg')) ext = DummyBatchExtractor(name='penguin') res = ext.transform(img1).to_df(timing=False, object_id=False) assert ext.num_calls == 1 assert res.shape == (1, 1) img2 = ImageStim(join(get_test_data_path(), 'image', 'button.jpg')) img3 = ImageStim(join(get_test_data_path(), 'image', 'obama.jpg')) res2 = ext.transform([img1, img2, img2, img3, img3, img1, img2]) assert ext.num_calls == 3 assert len(res2) == 7 assert res2[0] == res2[5] and res2[1] == res2[2] and res2[3] == res2[4] res2 = merge_results(res2) assert res2.shape == (3, 10) config.set_option('cache_transformers', cache_default)
def _iterate(self, stims, validation='strict', *args, **kwargs): batches = batch_iterable(stims, self._batch_size) results = [] for batch in progress_bar_wrapper(batches): use_cache = config.get_option('cache_transformers') target_inds = {} non_cached = [] for stim in batch: key = hash((hash(self), hash(stim))) # If using the cache, only transform stims that aren't in the # cache and haven't already appeared in the batch if not (use_cache and (key in _cache or key in target_inds)): target_inds[key] = len(non_cached) non_cached.append(stim) # _transform will likely fail if given an empty list if len(non_cached) > 0: batch_results = self._transform(non_cached, *args, **kwargs) else: batch_results = [] for i, stim in enumerate(batch): key = hash((hash(self), hash(stim))) # Use the target index to get the result from batch_results if key in target_inds: result = batch_results[target_inds[key]] result = _log_transformation(stim, result, self) self._propagate_context(stim, result) if use_cache: if isgenerator(result): result = list(result) _cache[key] = result results.append(result) # Otherwise, the result should be in the cache else: results.append(_cache[key]) return results
from os.path import join import json import numpy as np import pytest from pliers.tests.utils import (get_test_data_path, DummyExtractor, ClashingFeatureExtractor) from pliers.extractors import (LengthExtractor, BrightnessExtractor, SharpnessExtractor, VibranceExtractor) from pliers.stimuli import (ComplexTextStim, ImageStim, VideoStim, AudioStim) from pliers.support.download import download_nltk_data from pliers.extractors.base import ExtractorResult, merge_results from pliers import config cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', True) TEXT_DIR = join(get_test_data_path(), 'text') @pytest.fixture(scope='module') def get_nltk(): download_nltk_data() def test_check_target_type(): stim = ComplexTextStim(join(TEXT_DIR, 'sample_text.txt'), columns='to', default_duration=1) td = SharpnessExtractor()
def progress_bar_wrapper(iterable, **kwargs): ''' Wrapper that applies tqdm progress bar conditional on config settings. ''' return tqdm(iterable, **kwargs) if (config.get_option('progress_bar') and not isinstance(iterable, tqdm)) else iterable
def progress_bar_wrapper(iterable, **kwargs): ''' Wrapper that applies tqdm progress bar conditional on config settings. ''' return tqdm(iterable, ** kwargs) if (config.get_option('progress_bar') and not isinstance(iterable, tqdm)) else iterable
def transform(self, stims, validation='strict', *args, **kwargs): ''' Executes the transformation on the passed stim(s). Args: stims (str, Stim, list): One or more stimuli to process. Must be one of: - A string giving the path to a file that can be read in as a Stim (e.g., a .txt file, .jpg image, etc.) - A Stim instance of any type. - An iterable of stims, where each element is either a string or a Stim. validation (str): String specifying how validation errors should be handled. Must be one of: - 'strict': Raise an exception on any validation error - 'warn': Issue a warning for all validation errors - 'loose': Silently ignore all validation errors args: Optional positional arguments to pass onto the internal _transform call. kwargs: Optional positional arguments to pass onto the internal _transform call. ''' if isinstance(stims, str): stims = load_stims(stims) # If stims is a CompoundStim and the Transformer is expecting a single # input type, extract all matching stims if isinstance(stims, CompoundStim) and not isinstance( self._input_type, tuple): stims = stims.get_stim(self._input_type, return_all=True) if not stims: raise ValueError("No stims of class %s found in the provided" "CompoundStim instance." % self._input_type) # If stims is an iterable, naively loop over elements, removing # invalid results if needed if isiterable(stims): iters = self._iterate(stims, validation=validation, *args, **kwargs) if config.get_option('drop_bad_extractor_results'): iters = (i for i in iters if i is not None) iters = progress_bar_wrapper(iters, desc='Stim') return set_iterable_type(iters) # Validate stim, and then either pass it directly to the Transformer # or, if a conversion occurred, recurse. else: try: validated_stim = self._validate(stims) except TypeError as err: if validation == 'strict': raise err elif validation == 'warn': logging.warning(str(err)) return elif validation == 'loose': return # If a conversion occurred during validation, we recurse if stims is not validated_stim: return self.transform(validated_stim, *args, **kwargs) else: result = self._transform(validated_stim, *args, **kwargs) result = _log_transformation(validated_stim, result, self) if isgenerator(result): result = list(result) self._propagate_context(validated_stim, result) return result