def __init__(self, hop_size=0.1, top_n=None, labels=None, weights_path=None, **yamnet_kwargs): verify_dependencies(['tensorflow']) try: sys.path.insert(0, str(YAMNET_PATH)) self.yamnet = attempt_to_import('yamnet') verify_dependencies(['yamnet']) except MissingDependencyError: msg = ('Yamnet could not be imported. To download and set up ' 'yamnet, run:\n\tpython -m pliers.support.setup_yamnet') raise MissingDependencyError(dependencies=None, custom_message=msg) if top_n and labels: raise ValueError('Top_n and labels are mutually exclusive ' 'arguments. Reinstantiate the extractor setting ' 'top_n or labels to None (or leaving it ' 'unspecified).') MODULE_PATH = path.dirname(self.yamnet.__file__) LABELS_PATH = path.join(MODULE_PATH, 'yamnet_class_map.csv') self.weights_path = weights_path or path.join(MODULE_PATH, 'yamnet.h5') self.hop_size = hop_size self.yamnet_kwargs = yamnet_kwargs or {} self.params = self.yamnet.params self.params.PATCH_HOP_SECONDS = hop_size for par, v in self.yamnet_kwargs.items(): setattr(self.params, par, v) if self.params.PATCH_WINDOW_SECONDS != 0.96: logging.warning( 'Custom values for PATCH_WINDOW_SECONDS were ' 'passed. YAMNet was trained on windows of 0.96s. Different ' 'values might yield unreliable results.') self.top_n = top_n all_labels = pd.read_csv(LABELS_PATH)['display_name'].tolist() if labels is not None: missing = list(set(labels) - set(all_labels)) labels = list(set(labels) & set(all_labels)) if missing: logging.warning(f'Labels {missing} do not exist. Dropping.') self.labels = labels self.label_idx = [ i for i, l in enumerate(all_labels) if l in labels ] else: self.labels = all_labels self.label_idx = list(range(len(all_labels))) super(AudiosetLabelExtractor, self).__init__()
import base64 import os from pliers.transformers import Transformer, BatchTransformerMixin from pliers.utils import (EnvironmentKeyMixin, attempt_to_import, verify_dependencies) googleapiclient = attempt_to_import('googleapiclient', fromlist=['discovery']) oauth_client = attempt_to_import('oauth2client.client', 'oauth_client', ['GoogleCredentials']) DISCOVERY_URL = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}' class GoogleAPITransformer(Transformer, EnvironmentKeyMixin): _env_keys = 'GOOGLE_APPLICATION_CREDENTIALS' _log_attributes = ('handle_annotations',) def __init__(self, discovery_file=None, api_version='v1', max_results=100, num_retries=3, handle_annotations='prefix'): verify_dependencies(['googleapiclient', 'oauth_client']) if discovery_file is None: if 'GOOGLE_APPLICATION_CREDENTIALS' not in os.environ: raise ValueError("No Google application credentials found. " "A JSON service account key must be either " "passed as the discovery_file argument, or " "set in the GOOGLE_APPLICATION_CREDENTIALS " "environment variable.") discovery_file = os.environ['GOOGLE_APPLICATION_CREDENTIALS']
''' Stimuli that are inherently associated with remote resources. ''' import os from .base import load_stims from .compound import CompoundStim from .image import ImageStim from .text import TextStim from .video import VideoStim from pliers.utils import (APIDependent, attempt_to_import, verify_dependencies) twitter = attempt_to_import('twitter') class TweetStimFactory(APIDependent): ''' An object from which to generate TweetStims, creates an Api instance from the python-twitter library Args: consumer_key (str): A valid consumer key for the Twitter API consumer_secret (str): A valid consumer secret key for the Twitter API access_token_key (str): A valid access token for the Twitter API access_token_secret (str): A valid access token secret for the Twitter API To get these credentials, visit https://dev.twitter.com/. ''' _env_keys = ('TWITTER_CONSUMER_KEY', 'TWITTER_CONSUMER_SECRET', 'TWITTER_ACCESS_TOKEN_KEY', 'TWITTER_ACCESS_TOKEN_SECRET')
LibrosaFeatureExtractor, STFTAudioExtractor, MeanAmplitudeExtractor, SpectralCentroidExtractor, SpectralBandwidthExtractor, SpectralContrastExtractor, SpectralRolloffExtractor, PolyFeaturesExtractor, ZeroCrossingRateExtractor, ChromaSTFTExtractor, ChromaCQTExtractor, ChromaCENSExtractor, MelspectrogramExtractor, MFCCExtractor, TonnetzExtractor, TempogramExtractor, RMSExtractor, SpectralFlatnessExtractor, OnsetDetectExtractor, OnsetStrengthMultiExtractor, TempoExtractor, BeatTrackExtractor, HarmonicExtractor, PercussiveExtractor, AudiosetLabelExtractor) from pliers.stimuli import (ComplexTextStim, AudioStim, TranscribedAudioCompoundStim) from pliers.filters import AudioResamplingFilter from pliers.utils import attempt_to_import, verify_dependencies AUDIO_DIR = join(get_test_data_path(), 'audio') tf = attempt_to_import('tensorflow') def test_stft_extractor(): stim = AudioStim(join(AUDIO_DIR, 'barber.wav'), onset=4.2) ext = STFTAudioExtractor(frame_size=1., spectrogram=False, freq_bins=[(100, 300), (300, 3000), (3000, 20000)]) result = ext.transform(stim) df = result.to_df() assert df.shape == (557, 7) assert df['onset'][0] == 4.2 ext = STFTAudioExtractor(frame_size=1., spectrogram=False, freq_bins=5) result = ext.transform(stim)
from pliers.extractors import Extractor, merge_results from pliers.transformers import get_transformer from pliers.utils import attempt_to_import from six import string_types sklearn = attempt_to_import('sklearn') if sklearn: class SklearnBase(sklearn.base.TransformerMixin, sklearn.base.BaseEstimator): pass else: class SklearnBase(): pass class PliersTransformer(SklearnBase): ''' Simple wrapper for using pliers within a sklearn workflow. Args: transformer (Graph or Transformer): Pliers object to execute. Can either be a Graph with several transformers chained or a single transformer. ''' def __init__(self, transformer): if isinstance(transformer, string_types): self.transformer = get_transformer(transformer) else: self.transformer = transformer def fit(self, X, y=None): return self
''' Extractors that operate primarily or exclusively on Image stimuli. ''' from functools import partial import numpy as np import pandas as pd from pliers.stimuli.image import ImageStim from pliers.extractors.base import Extractor, ExtractorResult from pliers.utils import attempt_to_import, verify_dependencies, listify from pliers.support.due import due, Url, Doi cv2 = attempt_to_import('cv2') face_recognition = attempt_to_import('face_recognition') class ImageExtractor(Extractor): ''' Base Image Extractor class; all subclasses can only be applied to images. ''' _input_type = ImageStim class BrightnessExtractor(ImageExtractor): ''' Gets the average luminosity of the pixels in the image ''' VERSION = '1.0' def _extract(self, stim): data = stim.data
''' The `graph` module contains tools for constructing and executing graphs of pliers Transformers. ''' from pliers.extractors.base import merge_results from pliers.stimuli import __all__ as stim_list from pliers.transformers import get_transformer from pliers.utils import (listify, flatten, isgenerator, attempt_to_import, verify_dependencies) from itertools import chain from six import string_types from collections import OrderedDict import json pgv = attempt_to_import('pygraphviz', 'pgv') stim_list.insert(0, 'ExtractorResult') class Node(object): ''' A graph node/vertex. Represents a single transformer, optionally with references to children. Args: name (str): Name of the node transformer (Transformer): the Transformer instance at this node parameters (kwargs): parameters for initializing the Transformer ''' def __init__(self, transformer, name=None, **parameters): self.name = name
''' The `graph` module contains tools for constructing and executing graphs of pliers Transformers. ''' from pliers.extractors.base import merge_results from pliers.stimuli import __all__ as stim_list from pliers.transformers import get_transformer from pliers.utils import (listify, flatten, isgenerator, attempt_to_import, verify_dependencies) from itertools import chain from six import string_types from collections import OrderedDict import json pgv = attempt_to_import('pygraphviz', 'pgv') stim_list.insert(0, 'ExtractorResult') class Node(object): ''' A graph node/vertex. Represents a single transformer, optionally with references to children. Args: name (str): Name of the node transformer (Transformer): the Transformer instance at this node parameters (kwargs): parameters for initializing the Transformer ''' def __init__(self, transformer, name=None, **parameters): self.name = name self.children = [] if isinstance(transformer, string_types):
''' Stimuli that are inherently associated with remote resources. ''' import logging import os from .base import load_stims from .compound import CompoundStim from .image import ImageStim from .text import TextStim from .video import VideoStim from pliers.utils import (APIDependent, attempt_to_import, verify_dependencies) twitter = attempt_to_import('twitter') class TweetStimFactory(APIDependent): ''' An object from which to generate TweetStims, creates an Api instance from the python-twitter library Args: consumer_key (str): A valid consumer key for the Twitter API consumer_secret (str): A valid consumer secret key for the Twitter API access_token_key (str): A valid access token for the Twitter API access_token_secret (str): A valid access token secret for the Twitter API To get these credentials, visit https://dev.twitter.com/. '''
''' Rev.ai API-based Converter classes ''' import logging import os import time from pliers.stimuli.text import TextStim, ComplexTextStim from pliers.utils import attempt_to_import, verify_dependencies from pliers.converters.audio import AudioToTextConverter from pliers.transformers.api import APITransformer rev_ai = attempt_to_import('rev_ai') rev_ai_client = attempt_to_import('rev_ai.apiclient', 'rev_ai_client', ['RevAiAPIClient']) class RevAISpeechAPIConverter(APITransformer, AudioToTextConverter): ''' Uses the Rev AI speech-to-text API to transcribe an audio file. Args: access_token (str): API credential access token. Must be passed explicitly or stored in the environment variable specified in the _env_keys field. timeout (int): Number of seconds to wait for audio transcription to finish. Defaults to 90 seconds. request_rate (int): Number of seconds to wait between polling the API for completion. ''' _env_keys = ('REVAI_ACCESS_TOKEN', ) _log_attributes = ('access_token', 'timeout', 'request_rate')
''' Extractor classes based on pre-trained models. ''' import numpy as np from PIL import Image from pliers.extractors.image import ImageExtractor from pliers.extractors.base import ExtractorResult from pliers.utils import attempt_to_import, verify_dependencies tf = attempt_to_import('tensorflow') def _resize_image(image, shape): return np.array( Image.fromarray(image).resize(shape, resample=Image.BICUBIC)) class TensorFlowKerasInceptionV3Extractor(ImageExtractor): ''' Labels objects in images using a pretrained Inception V3 architecture implemented in TensorFlow / Keras. Images must be RGB and have shape (299, 299). Images will be resized (with some distortion) if the shape is different. Args: weights (str): URL to download pre-trained weights. If None (default), uses the pre-trained Inception V3 model (dated 2017-03-10) used in Keras Applications. num_predictions (int): Number of top predicted labels to retain for each image.
GoogleLanguageAPISentimentExtractor, GoogleLanguageAPISyntaxExtractor, GoogleLanguageAPITextCategoryExtractor, GoogleLanguageAPIEntitySentimentExtractor, ExtractorResult, merge_results) from pliers.extractors.api.google import GoogleVisionAPIExtractor from pliers.stimuli import ImageStim, VideoStim, TextStim from pliers.utils import attempt_to_import, verify_dependencies import pytest import json from os.path import join from ...utils import get_test_data_path import numpy as np googleapiclient = attempt_to_import('googleapiclient', fromlist=['discovery']) IMAGE_DIR = join(get_test_data_path(), 'image') VIDEO_DIR = join(get_test_data_path(), 'video') TEXT_DIR = join(get_test_data_path(), 'text') @pytest.mark.requires_payment @pytest.mark.skipif("'GOOGLE_APPLICATION_CREDENTIALS' not in os.environ") def test_google_vision_api_extractor_inits(): ext = GoogleVisionAPIExtractor(num_retries=5) assert ext.num_retries == 5 assert ext.max_results == 100 assert ext.service is not None
''' Extractor classes based on pre-trained models. ''' import numpy as np import pandas as pd from pliers.extractors.image import ImageExtractor from pliers.extractors.base import Extractor, ExtractorResult from pliers.filters.image import ImageResizingFilter from pliers.stimuli import ImageStim, TextStim from pliers.stimuli.base import Stim from pliers.support.exceptions import MissingDependencyError from pliers.utils import (attempt_to_import, verify_dependencies, listify) import logging tf = attempt_to_import('tensorflow') hub = attempt_to_import('tensorflow_hub') attempt_to_import('tensorflow.keras') attempt_to_import('tensorflow_text') class TFHubExtractor(Extractor): ''' A generic class for Tensorflow Hub extractors Args: url_or_path (str): url or path to TFHub model. You can browse models at https://tfhub.dev/. features (optional): list of labels (for classification) or other feature names. The number of items must match the number of features in the output. For example, if a classification model with 1000 output classes is passed (e.g. EfficientNet B6,
''' Wit.ai API-based Converters ''' import logging import os from abc import abstractproperty from pliers.stimuli.text import ComplexTextStim from pliers.utils import attempt_to_import, verify_dependencies from pliers.converters.audio import AudioToTextConverter from pliers.transformers.api import APITransformer from six.moves.urllib.request import Request, urlopen from six.moves.urllib.error import HTTPError sr = attempt_to_import('speech_recognition', 'sr') class SpeechRecognitionAPIConverter(APITransformer, AudioToTextConverter): ''' Uses the SpeechRecognition API, which interacts with several APIs, like Google and Wit, to run speech-to-text transcription on an audio file. Args: api_key (str): API key. Must be passed explicitly or stored in the environment variable specified in the _env_keys field. rate_limit (int): The minimum number of seconds required between transform calls on this Transformer. ''' _log_attributes = ('api_key', 'recognize_method') VERSION = '1.0' @abstractproperty def recognize_method(self):
''' Classes that represent text or sequences of text. ''' import re import pandas as pd from six import string_types from six.moves.urllib.request import urlopen from pliers.support.decorators import requires_nltk_corpus from pliers.utils import attempt_to_import, verify_dependencies from .base import Stim pysrt = attempt_to_import('pysrt') class TextStim(Stim): ''' Any simple text stimulus--most commonly a single word. Args: filename (str): Path to input file, if one exists. text (str): Text value to store. If none is provided, value is read from filename. onset (float): Optional onset of the text presentation (in secs) with respect to some more general context or timeline the user wishes to keep track of. duration (float): Optional duration of the TextStim, in seconds. order (int): Optional sequential index of the TextStim within some broader context. url (str): Optional url to read contents from. ''' _default_file_extension = '.txt'
''' Extractor classes based on pre-trained models. ''' import numpy as np from pliers.extractors.image import ImageExtractor from pliers.extractors.base import ExtractorResult from pliers.filters.image import ImageResizingFilter from pliers.utils import attempt_to_import, verify_dependencies tf = attempt_to_import('tensorflow') attempt_to_import('tensorflow.keras') class TensorFlowKerasApplicationExtractor(ImageExtractor): ''' Labels objects in images using a pretrained Inception V3 architecture implemented in TensorFlow / Keras. Images must be RGB and be a certain shape. Different model architectures may require different shapes, and images will be resized (with some distortion) if the shape of the image is different. Args: architecture (str): model architecture to use. One of 'vgg16', 'vgg19', 'resnet50', 'inception_resnetv2', 'inceptionv3', 'xception', 'densenet121', 'densenet169', 'nasnetlarge', or 'nasnetmobile'. weights (str): URL to download pre-trained weights. If None (default), uses the pre-trained weights trained on ImageNet used in Keras Applications. num_predictions (int): Number of top predicted labels to retain for each image. '''
''' Extractors that operate primarily or exclusively on Video stimuli. ''' from pliers.stimuli.video import VideoStim from pliers.extractors.base import Extractor, ExtractorResult from pliers.utils import attempt_to_import, verify_dependencies import numpy as np cv2 = attempt_to_import('cv2') class VideoExtractor(Extractor): ''' Base Video Extractor class; all subclasses can only be applied to video. ''' _input_type = VideoStim class FarnebackOpticalFlowExtractor(VideoExtractor): ''' Extracts total amount of dense optical flow between every pair of video frames. Args: pyr_scale (float): specifying the image scale (<1) to build pyramids for each image; pyr_scale=0.5 means a classical pyramid, where each next layer is twice smaller than the previous one. levels (int): number of pyramid layers including the initial image; levels=1 means that no extra layers are created and only the
#import tensorflow_hub as hub '''bert related helper code''' from pliers.extractors import bert_modeling from pliers.extractors import bert_tokenization from pliers.extractors import bert_extract_features '''skipthought related helper code ''' from pliers.extractors import skipthoughts '''SIF related helper code''' from pliers.extractors import sif_data_io, sif_params, SIF_embedding ''' import elmo related code from AllenAI''' from allennlp.commands.elmo import ElmoEmbedder embedding_methods = Enum('embedding_methods', 'average_embedding word2vec glove') keyedvectors = attempt_to_import('gensim.models.keyedvectors', 'keyedvectors', ['KeyedVectors']) doc2vecVectors = attempt_to_import('gensim.models.doc2vec', 'doc2vecVectors', ['Doc2Vec.load']) logging.getLogger('smart_open').setLevel(logging.ERROR) logger = logging.getLogger("text_encoding_logger") class DirectTextExtractorInterface(): ''' Args: method (str): The name of the embedding methods. The possibilities (averageembedding, doc2vec, sif...) will be provided to the users via a README. (default:averageembedding)
''' Filters that operate on TextStim inputs. ''' import numpy as np from pliers.stimuli.video import VideoStim, VideoFrameCollectionStim from pliers.utils import attempt_to_import, verify_dependencies from .base import Filter, TemporalTrimmingFilter cv2 = attempt_to_import('cv2') class VideoFilter(Filter): ''' Base class for all VideoFilters. ''' _input_type = VideoStim class FrameSamplingFilter(Filter): ''' Samples frames from video stimuli, to improve efficiency. Args: every (int): takes every nth frame hertz (int): takes n frames per second top_n (int): takes top n frames sorted by the absolute difference with the next frame ''' _input_type = VideoFrameCollectionStim _log_attributes = ('every', 'hertz', 'top_n') VERSION = '1.0'
''' Rev.ai API-based Converter classes ''' import logging import os import time from pliers.stimuli.text import TextStim, ComplexTextStim from pliers.utils import attempt_to_import, verify_dependencies from pliers.converters.audio import AudioToTextConverter from pliers.transformers.api import APITransformer rev_ai = attempt_to_import('rev_ai') rev_ai_client = attempt_to_import('rev_ai.apiclient', 'rev_ai_client', ['RevAiAPIClient']) class RevAISpeechAPIConverter(APITransformer, AudioToTextConverter): ''' Uses the Rev AI speech-to-text API to transcribe an audio file. Args: access_token (str): API credential access token. Must be passed explicitly or stored in the environment variable specified in the _env_keys field. timeout (int): Number of seconds to wait for audio transcription to finish. Defaults to 90 seconds. request_rate (int): Number of seconds to wait between polling the API for completion. '''
''' Extractors that operate on AudioStim inputs. ''' from pliers.stimuli.audio import AudioStim from pliers.stimuli.text import ComplexTextStim from pliers.extractors.base import Extractor, ExtractorResult from pliers.utils import attempt_to_import, verify_dependencies, listify import numpy as np from scipy import fft from abc import ABCMeta librosa = attempt_to_import('librosa') class AudioExtractor(Extractor): ''' Base Audio Extractor class; all subclasses can only be applied to audio. ''' _input_type = AudioStim class STFTAudioExtractor(AudioExtractor): ''' Short-time Fourier Transform extractor. Args: frame_size (float): The width of the frame/window to apply an FFT to, in seconds. hop_size (float): The step size to increment the window by on each iteration, in seconds (effectively, the sampling rate). freq_bins (list or int): The set of bins or frequency bands to extract power for. If an int is passed, this is the number of bins returned, with each bin spanning an equal range of frequencies.
''' Extractors that operate primarily or exclusively on Image stimuli. ''' from pliers.stimuli.image import ImageStim from pliers.extractors.base import Extractor, ExtractorResult from pliers.utils import attempt_to_import, verify_dependencies, listify from pliers.support.due import due, Url, Doi import numpy as np import pandas as pd from functools import partial cv2 = attempt_to_import('cv2') face_recognition = attempt_to_import('face_recognition') class ImageExtractor(Extractor): ''' Base Image Extractor class; all subclasses can only be applied to images. ''' _input_type = ImageStim class BrightnessExtractor(ImageExtractor): ''' Gets the average luminosity of the pixels in the image ''' VERSION = '1.0' def _extract(self, stim):
import os try: from contextlib import ExitStack except Exception as e: from contextlib2 import ExitStack from pliers.extractors.image import ImageExtractor from pliers.extractors.base import ExtractorResult from pliers.transformers import BatchTransformerMixin from pliers.transformers.api import APITransformer from pliers.utils import listify, attempt_to_import, verify_dependencies import pandas as pd clarifai_client = attempt_to_import( 'clarifai.rest.client', 'clarifai_client', [ 'ClarifaiApp', 'Concept', 'ModelOutputConfig', 'ModelOutputInfo', 'Image' ]) class ClarifaiAPIExtractor(APITransformer, BatchTransformerMixin, ImageExtractor): ''' Uses the Clarifai API to extract tags of images. Args: api_key (str): A valid API_KEY for the Clarifai API. Only needs to be passed the first time the extractor is initialized. model (str): The name of the Clarifai model to use. If None, defaults to the general image tagger. min_value (float): A value between 0.0 and 1.0 indicating the minimum confidence required to return a prediction. Defaults to 0.0.
import logging import os from pliers.transformers import BatchTransformerMixin from pliers.transformers.api import APITransformer from pliers.utils import attempt_to_import, verify_dependencies googleapiclient = attempt_to_import('googleapiclient', fromlist=['discovery']) google_auth = attempt_to_import('google.oauth2', 'google_auth', fromlist=['service_account']) DISCOVERY_URL = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}' class GoogleAPITransformer(APITransformer): ''' Base GoogleAPITransformer class. Args: discovery_file (str): path to discovery file containing Google application credentials. api_version (str): API version to use. max_results (int): Max number of results per page. num_retries (int): Number of times to retry query on failure. rate_limit (int): The minimum number of seconds required between transform calls on this Transformer. ''' _env_keys = 'GOOGLE_APPLICATION_CREDENTIALS' _log_attributes = ('discovery_file', 'api_version')
''' Core transformer logic. ''' from pliers import config from pliers.stimuli.base import Stim, _log_transformation, load_stims from pliers.stimuli.compound import CompoundStim from pliers.utils import (progress_bar_wrapper, isiterable, isgenerator, listify, batch_iterable, attempt_to_import) import pliers from six import with_metaclass, string_types from abc import ABCMeta, abstractmethod, abstractproperty import importlib import logging multiprocessing = attempt_to_import('pathos.multiprocessing', 'multiprocessing', ['ProcessingPool']) _cache = {} class Transformer(with_metaclass(ABCMeta)): _log_attributes = () _loggable = True VERSION = '0.1' # Stim types that *can* be passed as input, but aren't mandatory. This # allows for disjunctive specification; e.g., if _input_type is empty # and _optional_input_type is (AudioStim, TextStim), then _at least_ one # of the two must be passed. If both are specified in _input_type, then # the input would have to be a CompoundStim with both audio and text slots. _optional_input_type = ()
try: from contextlib import ExitStack except Exception as e: from contextlib2 import ExitStack from pliers.extractors.image import ImageExtractor from pliers.extractors.video import VideoExtractor from pliers.extractors.base import ExtractorResult from pliers.transformers import BatchTransformerMixin from pliers.transformers.api import APITransformer from pliers.utils import listify, attempt_to_import, verify_dependencies import pandas as pd clarifai_client = attempt_to_import('clarifai.rest.client', 'clarifai_client', ['ClarifaiApp', 'Concept', 'ModelOutputConfig', 'ModelOutputInfo', 'Image', 'Video']) class ClarifaiAPIExtractor(APITransformer): ''' Uses the Clarifai API to extract tags of visual stimuli. Args: api_key (str): A valid API_KEY for the Clarifai API. Only needs to be passed the first time the extractor is initialized. model (str): The name of the Clarifai model to use. If None, defaults to the general image tagger. min_value (float): A value between 0.0 and 1.0 indicating the minimum
''' Extractors that interact with the AWS Rekognition API. ''' from pliers.stimuli.image import ImageStim from pliers.extractors.base import Extractor, ExtractorResult import pandas as pd import boto3 from pliers.utils import attempt_to_import, verify_dependencies aws_rekognition_client = attempt_to_import('boto3') class AwsRekognitionExtractor(ImageStim, Extractor): def __init__(self, profile_name=None, region_name=None, extractor_type=None): verify_dependencies(['boto3']) if profile_name is not None and region_name is not None: self.session = boto3.Session(profile_name=profile_name) self.rekognition = boto3.Session.client( 'rekognition', region_name=region_name) elif profile_name is not None: self.rekognition = boto3.client('rekognition') self.session = boto3.Session(profile_name=profile_name) else: self.rekognition = boto3.client('rekognition') self.extractor_type = extractor_type super(AwsRekognitionExtractor, self).__init__()
''' Converter classes that operate on ImageStim inputs. ''' from PIL import Image from .base import Converter from pliers.stimuli.image import ImageStim from pliers.stimuli.text import TextStim from pliers.utils import attempt_to_import, verify_dependencies pytesseract = attempt_to_import('pytesseract') class ImageToTextConverter(Converter): ''' Base ImageToText Converter class; all subclasses can only be applied to image and convert to text. ''' _input_type = ImageStim _output_type = TextStim class TesseractConverter(ImageToTextConverter): ''' Uses the Tesseract library to extract text from images. ''' VERSION = '1.0' def _convert(self, stim): verify_dependencies(['pytesseract']) text = pytesseract.image_to_string(Image.fromarray(stim.data)) return TextStim(text=text, onset=stim.onset, duration=stim.duration)
from pliers.stimuli.text import TextStim, ComplexTextStim from pliers.extractors.base import Extractor, ExtractorResult from pliers.support.exceptions import PliersError from pliers.support.decorators import requires_nltk_corpus from pliers.datasets.text import fetch_dictionary from pliers.transformers import BatchTransformerMixin from pliers.utils import attempt_to_import, verify_dependencies import numpy as np import pandas as pd import nltk from nltk.sentiment.vader import SentimentIntensityAnalyzer import sys from six import string_types keyedvectors = attempt_to_import('gensim.models.keyedvectors', 'keyedvectors', ['KeyedVectors']) sklearn_text = attempt_to_import('sklearn.feature_extraction.text', 'sklearn_text', ['VectorizerMixin', 'CountVectorizer']) class TextExtractor(Extractor): ''' Base Text Extractor class; all subclasses can only be applied to text. ''' _input_type = TextStim class ComplexTextExtractor(Extractor): ''' Base ComplexTextStim Extractor class; all subclasses can only be applied to ComplexTextStim instance. '''
import pandas as pd import numpy as np from pliers.utils import attempt_to_import, verify_dependencies import matplotlib.pyplot as plt from scipy.spatial.distance import mahalanobis from numpy.linalg import LinAlgError sns = attempt_to_import('seaborn') def correlation_matrix(df): ''' Returns a pandas DataFrame with the pair-wise correlations of the columns. Args: df: pandas DataFrame with columns to run diagnostics on ''' columns = df.columns.tolist() corr = pd.DataFrame( np.corrcoef(df, rowvar=0), columns=columns, index=columns) return corr def eigenvalues(df): ''' Returns a pandas Series with eigenvalues of the correlation matrix. Args: df: pandas DataFrame with columns to run diagnostics on '''
import numpy as np import pandas as pd import scipy import nltk from nltk.sentiment.vader import SentimentIntensityAnalyzer from pliers.stimuli.text import TextStim, ComplexTextStim from pliers.extractors.base import Extractor, ExtractorResult from pliers.support.exceptions import PliersError from pliers.support.decorators import requires_nltk_corpus from pliers.datasets.text import fetch_dictionary from pliers.transformers import BatchTransformerMixin from pliers.utils import (attempt_to_import, verify_dependencies, flatten, listify) keyedvectors = attempt_to_import('gensim.models.keyedvectors', 'keyedvectors', ['KeyedVectors']) sklearn_text = attempt_to_import('sklearn.feature_extraction.text', 'sklearn_text', ['CountVectorizer']) spacy = attempt_to_import('spacy') transformers = attempt_to_import('transformers') class TextExtractor(Extractor): ''' Base Text Extractor class; all subclasses can only be applied to text. ''' _input_type = TextStim class ComplexTextExtractor(Extractor): ''' Base ComplexTextStim Extractor class; all subclasses can only be applied to ComplexTextStim instance.
''' Extractors that operate on AudioStim inputs. ''' from pliers.stimuli.audio import AudioStim from pliers.stimuli.text import ComplexTextStim from pliers.extractors.base import Extractor, ExtractorResult from pliers.utils import attempt_to_import, verify_dependencies, listify import numpy as np from scipy import fft librosa = attempt_to_import('librosa') class AudioExtractor(Extractor): ''' Base Audio Extractor class; all subclasses can only be applied to audio. ''' _input_type = AudioStim class STFTAudioExtractor(AudioExtractor): ''' Short-time Fourier Transform extractor. Args: frame_size (float): The width of the frame/window to apply an FFT to, in seconds. hop_size (float): The step size to increment the window by on each iteration, in seconds (effectively, the sampling rate). freq_bins (list or int): The set of bins or frequency bands to extract power for. If an int is passed, this is the number of bins returned, with each bin spanning an equal range of frequencies. E.g., if bins=5 and the frequency spectrum runs from 0 to 20KHz, each bin will span 4KHz. If a list is passed, each element must be
''' Wit.ai API-based Converters ''' import logging import os from abc import abstractproperty from pliers.stimuli.text import ComplexTextStim from pliers.utils import attempt_to_import, verify_dependencies from pliers.converters.audio import AudioToTextConverter from pliers.transformers.api import APITransformer from six.moves.urllib.request import Request, urlopen from six.moves.urllib.error import HTTPError sr = attempt_to_import('speech_recognition', 'sr') class SpeechRecognitionAPIConverter(APITransformer, AudioToTextConverter): ''' Uses the SpeechRecognition API, which interacts with several APIs, like Google and Wit, to run speech-to-text transcription on an audio file. Args: api_key (str): API key. Must be passed explicitly or stored in the environment variable specified in the _env_keys field. rate_limit (int): The minimum number of seconds required between transform calls on this Transformer. ''' _log_attributes = ('api_key', 'recognize_method') VERSION = '1.0' @abstractproperty
''' Extractors that interact with the Indico API. ''' import os from pliers.extractors.image import ImageExtractor from pliers.extractors.text import TextExtractor from pliers.extractors.base import Extractor, ExtractorResult from pliers.transformers import BatchTransformerMixin from pliers.utils import (EnvironmentKeyMixin, attempt_to_import, verify_dependencies) import pandas as pd indicoio = attempt_to_import('indicoio') class IndicoAPIExtractor(BatchTransformerMixin, Extractor, EnvironmentKeyMixin): ''' Base class for all Indico API Extractors Args: api_key (str): A valid API key for the Indico API. Only needs to be passed the first time the extractor is initialized. models (list): The names of the Indico models to use. ''' _log_attributes = ('models', 'model_names') _input_type = () _batch_size = 20 _env_keys = 'INDICO_APP_KEY' VERSION = '1.0'
from os import path import sys import logging import numpy as np from scipy import fft import pandas as pd from pliers.stimuli.audio import AudioStim from pliers.stimuli.text import ComplexTextStim from pliers.extractors.base import Extractor, ExtractorResult from pliers.utils import attempt_to_import, verify_dependencies, listify from pliers.support.exceptions import MissingDependencyError from pliers.support.setup_yamnet import YAMNET_PATH librosa = attempt_to_import('librosa') tf = attempt_to_import('tensorflow') class AudioExtractor(Extractor): ''' Base Audio Extractor class; all subclasses can only be applied to audio. ''' _input_type = AudioStim class STFTAudioExtractor(AudioExtractor): ''' Short-time Fourier Transform extractor. Args: frame_size (float): The width of the frame/window to apply an FFT to, in seconds.
import pandas as pd import numpy as np from pliers.utils import attempt_to_import, verify_dependencies import matplotlib.pyplot as plt from scipy.spatial.distance import mahalanobis from numpy.linalg import LinAlgError sns = attempt_to_import('seaborn') def correlation_matrix(df): ''' Returns a pandas DataFrame with the pair-wise correlations of the columns. Args: df: pandas DataFrame with columns to run diagnostics on ''' columns = df.columns.tolist() corr = pd.DataFrame(np.corrcoef(df, rowvar=0), columns=columns, index=columns) return corr def eigenvalues(df): ''' Returns a pandas Series with eigenvalues of the correlation matrix. Args: df: pandas DataFrame with columns to run diagnostics on '''
''' Contains sklearn-compatible wrappers for pliers. ''' from pliers.extractors import Extractor, merge_results from pliers.transformers import get_transformer from pliers.utils import attempt_to_import from six import string_types sklearn = attempt_to_import('sklearn') if sklearn: class SklearnBase(sklearn.base.TransformerMixin, sklearn.base.BaseEstimator): pass else: class SklearnBase(): pass class PliersTransformer(SklearnBase): ''' Simple wrapper for using pliers within a sklearn workflow. Args: transformer (Graph or Transformer): Pliers object to execute. Can either be a Graph with several transformers chained or a single transformer. ''' def __init__(self, transformer): if isinstance(transformer, string_types): self.transformer = get_transformer(transformer) else: