def test_loader_nonexistent(): text_file = 'this/doesnt/exist.txt' with pytest.raises(IOError): stims = load_stims(text_file) audio_file = 'no/audio/here.wav' with pytest.raises(IOError): stims = load_stims([text_file, audio_file]) text_file = join(get_test_data_path(), 'text', 'sample_text.txt') stims = load_stims([text_file, audio_file], fail_silently=True) assert len(stims) == 1 with pytest.raises(IOError): stims = load_stims(audio_file, fail_silently=True)
def clean_transcript(input_transcript, input_media, onset=None, offset=None): stim = load_stims([input_media])[0] if not isinstance(stim, AudioStim): conv = VideoToAudioConverter() stim = conv.transform(stim) input_media = '/tmp/input_audio.wav' stim.save(input_media) _, extension = splitext(input_transcript) clean_transcript = '/tmp/clean_transcript.txt' with open(clean_transcript, 'w') as new_file: if extension == 'srt': txt = ComplexTextStim(input_transcript) for el in txt.elements: _clean_save(el.text, new_file, el.onset, el.duration) else: # Treat as a singe block of text if onset is None or offset is None: raise Exception("Onset and offset must be declared") txt = TextStim(input_transcript) _clean_save(txt.text, new_file, onset, stim.duration - offset) return clean_transcript, input_media
def hash_stim(stim, blocksize=65536): """ Hash a pliers stimulus """ if isinstance(stim, Path): stim = stim.as_posix() if isinstance(stim, str): from pliers.stimuli import load_stims from os.path import isfile assert isfile(stim) stim = load_stims(stim) hasher = hashlib.sha1() if hasattr(stim, "data"): return hash_data(stim.data) else: filename = stim.history.source_file \ if stim.history \ else stim.filename with open(filename, 'rb') as afile: buf = afile.read(blocksize) while len(buf) > 0: hasher.update(buf) buf = afile.read(blocksize) return hasher.hexdigest()
def test_magic_loader(): text_file = join(get_test_data_path(), 'text', 'sample_text.txt') audio_file = join(get_test_data_path(), 'audio', 'barber.wav') video_file = join(get_test_data_path(), 'video', 'small.mp4') stim_files = [text_file, audio_file, video_file] stims = load_stims(stim_files) assert len(stims) == 3 assert round(stims[1].duration) == 57 assert isinstance(stims[0].text, string_types) assert stims[2].width == 560
def test_magic_loader2(): text_file = join(get_test_data_path(), 'text', 'sample_text.txt') video_url = 'http://www.obamadownloads.com/videos/iran-deal-speech.mp4' audio_url = 'http://www.bobainsworth.com/wav/simpsons/themodyn.wav' image_url = 'https://www.whitehouse.gov/sites/whitehouse.gov/files/images/twitter_cards_potus.jpg' text_url = 'https://github.com/tyarkoni/pliers/blob/master/README.md' stims = load_stims([text_file, video_url, audio_url, image_url, text_url]) assert len(stims) == 5 assert stims[1].fps == 12 assert stims[3].data.shape == (240, 240, 3)
def test_magic_loader2(): text_file = join(get_test_data_path(), 'text', 'sample_text.txt') video_url = 'https://archive.org/download/DisneyCastletest/Disney_Castle_512kb.mp4' audio_url = 'https://archive.org/download/999WavFiles/TANKEN.WAV' image_url = 'https://archive.org/download/NIX-C-1987-11903/1987_11903L.jpg' text_url = 'https://github.com/psychoinformaticslab/pliers/blob/master/README.rst' stims = load_stims([text_file, video_url, audio_url, image_url, text_url]) assert len(stims) == 5 assert stims[1].fps == 30.0 assert stims[3].data.shape == (288, 360, 3)
def _load_stim_models(dataset_name, task_name): """ Given a dataset and task, load all available stimuli as Pliers stimuli, and pair them with original database stim object. """ stim_models = Stimulus.query.filter_by( active=True).join(RunStimulus).join(Run).join(Task).filter_by( name=task_name).join(Dataset).filter_by(name=dataset_name) stims = [] print("Loading stim models...") for stim_model in progressbar(stim_models): if stim_model.path is None: # Load both ways for Text stimuli stims.append( (stim_model, ComplexTextStim(text=stim_model.content))) stims.append((stim_model, TextStim(text=stims[-1][1].data))) else: stims.append((stim_model, load_stims(stim_model.path))) return stims
def check_updates(transformers, datastore=None, stimuli=None): """ Run transformers through a battery of stimuli, and check if output has changed. Store results in csv file for comparison. Args: transformers (list): A list of tuples of transformer names and dictionary of parameters to instantiate with (or empty dict). datastore (str): Filepath of CSV file with results. Stored in home dir by default. stimuli (list): List of stimuli file paths to extract from. If None, use test data. """ # Find datastore file datastore = datastore or expanduser('~/.pliers_updates') prior_data = pd.read_csv(datastore) if exists(datastore) else None # Load stimuli stimuli = stimuli or glob.glob( join(dirname(realpath(__file__)), '../tests/data/image/CC0/*')) stimuli = load_stims(stimuli) # Get transformers loaded_transformers = { get_transformer(name, **params): (name, params) for name, params in transformers } # Transform stimuli results = pd.DataFrame({'time_extracted': [datetime.datetime.now()]}) for trans in loaded_transformers.keys(): for stim in stimuli: if trans._stim_matches_input_types(stim): res = trans.transform(stim) try: # Add iterable res = [res._data for r in res] except TypeError: res = res._data res = hash_data(res) if isinstance(trans, (Converter, Filter)) else res[0][0] results["{}.{}".format(trans.__hash__(), stim.name)] = [res] # Check for mismatches mismatches = [] if prior_data is not None: last = prior_data[ prior_data.time_extracted == prior_data.time_extracted.max()]. \ iloc[0].drop('time_extracted') for label, value in results.iteritems(): old = last.get(label) new = value.values[0] if old is not None: if isinstance(new, str): if new != old: mismatches.append(label) elif not np.isclose(old, new): mismatches.append(label) results = prior_data.append(results) results.to_csv(datastore, index=False) # Get corresponding transformer name and parameters def get_trans(hash_tr): for obj, attr in loaded_transformers.items(): if str(obj.__hash__()) == hash_tr: return attr delta_t = set([m.split('.')[0] for m in mismatches]) delta_t = [get_trans(dt) for dt in delta_t] return {'transformers': delta_t, 'mismatches': mismatches}
def convert_stimuli(dataset_name, task_name, converters): """ Convert stimuli to different modality using pliers. Args: dataset_name - dataset name task_name - task name converters - dictionary of converter names to parameters Output: list of db ids of converted stimuli """ print("Converting stimuli") dataset_id = Dataset.query.filter_by(name=dataset_name).one().id converters = [get_transformer(n, **p) for n, p in converters] # Load all active original stimuli for task stim_objects = Stimulus.query.filter_by(active=True, parent_id=None).join( RunStimulus).join(Run).join(Task).filter_by(name=task_name).join( Dataset).filter_by(name=dataset_name) total_new_stims = [] # Extract new stimuli from original stimuli for stim in stim_objects: new_stims = [] # Re-create new RS associations with newly created stims rs_orig = RunStimulus.query.filter_by(stimulus_id=stim.id).join( Run).join(Task).filter_by(name=task_name) loaded_stim = load_stims(stim.path) # Extract for each converter for conv in converters: results = [] # Extract and flatten results (to a single unit) if conv._stim_matches_input_types(loaded_stim): cstim = conv.transform(loaded_stim) try: # Add iterable results += cstim except TypeError: if hasattr(cstim, 'elements'): results += cstim.elements else: results.append(cstim) results = [res for res in results if hasattr(res, 'data') and res.data != ''] new_stims += create_new_stimuli( dataset_id, task_name, stim.id, results, rs_orig, transformer=cstim.history.transformer_class, transformer_params=cstim.history.transformer_params) # De-activate previously generated stimuli from these converters. update = Stimulus.query.filter_by(parent_id=stim.id).filter( Stimulus.id.notin_(new_stims), Stimulus.converter_name == cstim.history.transformer_class, Stimulus.converter_parameters == cstim.history.transformer_params) if update.count(): update.update(dict(active=False), synchronize_session='fetch') db.session.commit() total_new_stims += new_stims return total_new_stims
def check_updates(transformers, datastore=None, stimuli=None): """ Run transformers through a battery of stimuli, and check if output has changed. Store results in csv file for comparison. Args: transformers (list): A list of tuples of transformer names and dictionary of parameters to instantiate with (or empty dict). datastore (str): Filepath of CSV file with results. Stored in home dir by default. stimuli (list): List of stimuli file paths to extract from. If None, use test data. """ # Find datastore file datastore = datastore or expanduser('~/.pliers_updates') prior_data = pd.read_csv(datastore) if exists(datastore) else None # Load stimuli stimuli = stimuli or glob.glob( join(dirname(realpath(__file__)), '../tests/data/image/CC0/*')) stimuli = load_stims(stimuli) # Get transformers loaded_transformers = {get_transformer(name, **params): (name, params) for name, params in transformers} # Transform stimuli results = pd.DataFrame({'time_extracted': [datetime.datetime.now()]}) for trans in loaded_transformers.keys(): for stim in stimuli: if trans._stim_matches_input_types(stim): res = trans.transform(stim) try: # Add iterable res = [getattr(res, '_data', res.data) for r in res] except TypeError: res = getattr(res, '_data', res.data) res = hash_data(res) results["{}.{}".format(trans.__hash__(), stim.name)] = [res] # Check for mismatches mismatches = [] if prior_data is not None: last = prior_data[ prior_data.time_extracted == prior_data.time_extracted.max()]. \ iloc[0].drop('time_extracted') for label, value in results.iteritems(): old = last.get(label) new = value.values[0] if old is not None: if isinstance(new, str): if new != old: mismatches.append(label) elif not np.isclose(old, new): mismatches.append(label) results = prior_data.append(results) results.to_csv(datastore, index=False) # Get corresponding transformer name and parameters def get_trans(hash_tr): for obj, attr in loaded_transformers.items(): if str(obj.__hash__()) == hash_tr: return attr delta_t = set([m.split('.')[0] for m in mismatches]) delta_t = [get_trans(dt) for dt in delta_t] return {'transformers': delta_t, 'mismatches': mismatches}