Пример #1
0
 def _postprocess(self, stims, preds, tok, wds, ons, dur):
     preds = preds[0].numpy()[:, 1:-1, :]
     if self.return_softmax:
         preds = scipy.special.softmax(preds, axis=-1)
     out_idx = preds[0, self.mask_pos, :].argsort()[::-1]
     if self.top_n:
         sub_idx = out_idx[:self.top_n]
     elif self.target:
         sub_idx = self.tokenizer.convert_tokens_to_ids(self.target)
     elif self.threshold:
         sub_idx = np.where(preds[0, self.mask_pos, :] >= self.threshold)[0]
     else:
         sub_idx = out_idx
     out_idx = [idx for idx in out_idx if idx in sub_idx]
     feat = self.tokenizer.convert_ids_to_tokens(out_idx)
     feat = [
         f.capitalize() if len(f) == len(f.encode()) else f for f in feat
     ]
     data = [listify(p) for p in preds[0, self.mask_pos, out_idx]]
     if self.return_masked_word:
         feat, data = self._return_masked_word(preds, feat, data)
     if self.return_input:
         data += [stims.name]
         feat += ['sequence']
     mask_ons = listify(stims.elements[self.mask_pos].onset)
     mask_dur = listify(stims.elements[self.mask_pos].duration)
     return data, feat, mask_ons, mask_dur
Пример #2
0
    def __init__(self, functions=None, var_names=None,
                 subset_idx=None, **kwargs):
        functions = listify(functions)
        if var_names is not None:
            var_names = listify(var_names)
            if len(var_names) != len(functions):
                raise ValueError('Length of var_names must match number of '
                                 'functions')
        for idx, f in enumerate(functions):
            if isinstance(f, str):
                try:
                    f_mod, f_func = f.rsplit('.', 1)
                    functions[idx] = getattr(import_module(f_mod),
                                            f_func)
                except:
                    try:
                        functions[idx] = eval(f)
                    except:
                        raise ValueError(f"{f} is not a valid function")
        if var_names is None:
            var_names = [f.__name__ for f in functions]
        self.var_names = var_names

        self.functions = functions
        self.kwargs = kwargs
        self.subset_idx = subset_idx
        super().__init__()
Пример #3
0
 def _preprocess(self, stim):
     x = listify(stim.data)
     if self.preprocessor_url_or_path:
         preprocessor = hub.KerasLayer(self.preprocessor_url_or_path,
                                       self.preprocessor_kwargs)
         x = preprocessor(x)
     return x
Пример #4
0
def get_converter(in_type, out_type, *args, **kwargs):
    ''' Scans the list of available Converters and returns an instantiation
    of the first one whose input and output types match those passed in.

    Args:
        in_type (type): The type of input the converter must have.
        out_type (type): The type of output the converter must have.
        args, kwargs: Optional positional and keyword arguments to pass onto
            matching Converter's initializer.
    '''
    convs = pliers.converters.__all__

    # If config includes default converters for this combination, try them
    # first
    out_type = listify(out_type)[::-1]
    default_convs = config.get_option('default_converters')

    for ot in out_type:
        conv_str = '%s->%s' % (in_type.__name__, ot.__name__)
        if conv_str in default_convs:
            convs = list(default_convs[conv_str]) + convs

    for name in convs:
        cls = getattr(pliers.converters, name)
        if not inspect.isclass(cls) or not issubclass(cls, Converter):
            continue

        available = cls.available if issubclass(cls,
                                                EnvironmentKeyMixin) else True
        if cls._input_type == in_type and cls._output_type in out_type \
                and available:
            conv = cls(*args, **kwargs)
            return conv

    return None
Пример #5
0
    def __init__(self, api_key=None, model='general-v1.3', min_value=None,
                 max_concepts=None, select_concepts=None, rate_limit=None,
                 batch_size=None):
        verify_dependencies(['clarifai_client'])
        if api_key is None:
            try:
                api_key = os.environ['CLARIFAI_API_KEY']
            except KeyError:
                raise ValueError("A valid Clarifai API API_KEY "
                                 "must be passed the first time a Clarifai "
                                 "extractor is initialized.")

        self.api_key = api_key
        try:
            self.api = clarifai_client.ClarifaiApp(api_key=api_key)
            self.model = self.api.models.get(model)
        except clarifai_client.ApiError as e:
            logging.warn(str(e))
            self.api = None
            self.model = None
        self.model_name = model
        self.min_value = min_value
        self.max_concepts = max_concepts
        self.select_concepts = select_concepts
        if select_concepts:
            select_concepts = listify(select_concepts)
            self.select_concepts = [clarifai_client.Concept(concept_name=n)
                                    for n in select_concepts]
        super(ClarifaiAPIExtractor, self).__init__(rate_limit=rate_limit)
Пример #6
0
    def __init__(self,
                 api_key=None,
                 model='general-v1.3',
                 min_value=None,
                 max_concepts=None,
                 select_concepts=None,
                 rate_limit=None):
        verify_dependencies(['clarifai_client'])
        if api_key is None:
            try:
                api_key = os.environ['CLARIFAI_API_KEY']
            except KeyError:
                raise ValueError("A valid Clarifai API API_KEY "
                                 "must be passed the first time a Clarifai "
                                 "extractor is initialized.")

        self.api_key = api_key
        try:
            self.api = clarifai_client.ClarifaiApp(api_key=api_key)
            self.model = self.api.models.get(model)
        except clarifai_client.ApiError:
            self.api = None
            self.model = None
        self.model_name = model
        self.min_value = min_value
        self.max_concepts = max_concepts
        self.select_concepts = select_concepts
        if select_concepts:
            select_concepts = listify(select_concepts)
            self.select_concepts = [
                clarifai_client.Concept(concept_name=n)
                for n in select_concepts
            ]
        super(ClarifaiAPIExtractor, self).__init__(rate_limit=rate_limit)
Пример #7
0
    def _transform(self, stim, *args, **kwargs):
        # Check if we are requesting faster than the rate limit,
        # if so, throttle by sleeping
        time_diff = time.time() - self._last_request_time
        if time_diff < self.rate_limit:
            time.sleep(self.rate_limit - time_diff)
        self._last_request_time = time.time()

        # Check if we are trying to transform a large amount of data
        self.transformed_stim_count += len(listify(stim))
        if not config.get_option('allow_large_jobs'):
            if not isiterable(stim) and stim.duration \
               and stim.duration > config.get_option('long_job'):
                raise ValueError("Attempted to run an API transformation "
                                 "on a stimulus of duration %f, aborting. "
                                 "To allow this transformation, set "
                                 "config option 'allow_large_jobs' to "
                                 "True." % stim.duration)

            if self.transformed_stim_count > config.get_option('large_job'):
                raise ValueError("Number of transformations using this %s "
                                 "would exceed %d, aborting further "
                                 "transformations. To allow, set config "
                                 "option 'allow_large_jobs' to True." %
                                 (self.__class__.__name__,
                                  config.get_option('large_job')))

        if config.get_option('api_key_validation') and not self.validate_keys():
            raise ValueError("Error running %s, a provided environment key "
                             "was invalid or unauthorized. Please check that "
                             "you have authorized credentials for accessing "
                             "the target API." % self.__class__.__name__)

        return super(APITransformer, self)._transform(stim, *args, **kwargs)
Пример #8
0
    def _transform(self, stim, *args, **kwargs):
        # Check if we are requesting faster than the rate limit,
        # if so, throttle by sleeping
        time_diff = time.time() - self._last_request_time
        if time_diff < self.rate_limit:
            time.sleep(self.rate_limit - time_diff)
        self._last_request_time = time.time()

        # Check if we are trying to transform a large amount of data
        self.transformed_stim_count += len(listify(stim))
        if not config.get_option('allow_large_jobs'):
            if not isiterable(stim) and stim.duration \
               and stim.duration > config.get_option('long_job'):
                raise ValueError("Attempted to run an API transformation "
                                 "on a stimulus of duration %f, aborting. "
                                 "To allow this transformation, set "
                                 "config option 'allow_large_jobs' to "
                                 "True." % stim.duration)

            if self.transformed_stim_count > config.get_option('large_job'):
                raise ValueError("Number of transformations using this %s "
                                 "would exceed %d, aborting further "
                                 "transformations. To allow, set config "
                                 "option 'allow_large_jobs' to True." %
                                 (self.__class__.__name__,
                                  config.get_option('large_job')))

        if config.get_option('api_key_validation') and not self.validate_keys():
            raise ValueError("Error running %s, a provided environment key "
                             "was invalid or unauthorized. Please check that "
                             "you have authorized credentials for accessing "
                             "the target API." % self.__class__.__name__)

        return super(APITransformer, self)._transform(stim, *args, **kwargs)
Пример #9
0
 def _transform(self, stim, *args, **kwargs):
     new_stim = self._filter(stim, *args, **kwargs)
     if not isinstance(new_stim, self._input_type) and \
        not isinstance(listify(new_stim)[0], stim.__class__):
         raise ValueError("Filter must return a Stim of the same type as "
                          "its input.")
     return new_stim
Пример #10
0
def get_transformer(name, base=None, *args, **kwargs):
    ''' Scans list of currently available Transformer classes and returns an
    instantiation of the first one whose name perfectly matches
    (case-insensitive).
    Args:
        name (str): The name of the transformer to retrieve. Case-insensitive;
            e.g., 'stftextractor' or 'CornerDetectionExtractor'.
        base (str, list): Optional name of transformer modules to search.
            Valid values are 'converters', 'extractors', and 'filters'.
        args, kwargs: Optional positional or keyword arguments to pass onto
            the Transformer.
    '''

    name = name.lower()

    # Default to searching all kinds of Transformers
    if base is None:
        base = ['extractors', 'converters', 'filters']

    base = listify(base)

    for b in base:
        importlib.import_module('pliers.%s' % b)
        mod = getattr(pliers, b)
        classes = getattr(mod, '__all__')
        for cls_name in classes:
            if cls_name.lower() == name.lower():
                cls = getattr(mod, cls_name)
                return cls(*args, **kwargs)

    raise KeyError("No transformer named '%s' found." % name)
Пример #11
0
 def _transform(self, stim, *args, **kwargs):
     new_stim = self._filter(stim, *args, **kwargs)
     if not isinstance(new_stim, self._input_type) and \
        not isinstance(listify(new_stim)[0], stim.__class__):
         raise ValueError("Filter must return a Stim of the same type as "
                          "its input.")
     return new_stim
Пример #12
0
def get_converter(in_type, out_type, *args, **kwargs):
    ''' Scans the list of available Converters and returns an instantiation
    of the first one whose input and output types match those passed in.
    Args:
        in_type (type): The type of input the converter must have.
        out_type (type): The type of output the converter must have.
        args, kwargs: Optional positional and keyword arguments to pass onto
            matching Converter's initializer.
    '''
    convs = pliers.converters.__all__

    # If config includes default converters for this combination, try them first
    out_type = listify(out_type)[::-1]
    for ot in out_type:
        conv_str = '%s->%s' % (in_type.__name__, ot.__name__)
        if conv_str in config.default_converters:
            convs = list(config.default_converters[conv_str]) + convs

    for name in convs:
        cls = getattr(pliers.converters, name)
        if not issubclass(cls, Converter):
            continue

        if cls._input_type == in_type and cls._output_type in out_type and cls.available:
            try:
                conv = cls(*args, **kwargs)
                return conv
            except ValueError:
                # Important for API converters
                pass

    return None
Пример #13
0
    def _extract(self, stim):

        values = self._get_values(stim)

        if self._feature == 'beat_track':
            beats = np.array(values[1])
            values = beats

        values = values.T
        n_frames = len(values)

        feature_names = listify(self.get_feature_names())

        onsets = librosa.frames_to_time(range(n_frames),
                                        sr=stim.sampling_rate,
                                        hop_length=self.hop_length)

        onsets = onsets + stim.onset if stim.onset else onsets

        durations = [self.hop_length / float(stim.sampling_rate)] * n_frames

        return ExtractorResult(values,
                               stim,
                               self,
                               features=feature_names,
                               onsets=onsets,
                               durations=durations,
                               orders=list(range(n_frames)))
Пример #14
0
 def _extract(self, stim):
     values = self.func(stim.data)
     feature_names = listify(self.get_feature_names())
     return ExtractorResult(values,
                            stim,
                            self,
                            features=feature_names,
                            raw=values)
Пример #15
0
 def _preprocess(self, stim):
     if self.transform_inp:
         return self.transform_inp(stim.data)
     else:
         if type(stim) == TextStim:
             return listify(stim.data)
         else:
             return stim.data
Пример #16
0
    def _stim_matches_input_types(self, stim):
        # Checks if passed Stim meets all _input_type and _optional_input_type
        # specifications.

        mandatory = tuple(listify(self._input_type))
        optional = tuple(listify(self._optional_input_type))

        if isinstance(stim, CompoundStim):
            return stim.has_types(mandatory) or (
                not mandatory and stim.has_types(optional, False))

        if len(mandatory) > 1:
            msg = "Transformer of class %s requires multiple mandatory " + \
                  "inputs, so the passed input Stim must be a CompoundStim" + \
                  "--which it isn't." % self.__class__.__name__
            raise ValueError(msg)

        return isinstance(stim, mandatory) or (not mandatory
                                               and isinstance(stim, optional))
Пример #17
0
 def _transform(self, stim, *args, **kwargs):
     stims = listify(stim)
     if all(self._stim_matches_input_types(s) for s in stims):
         result = super() \
             ._transform(stims, *args, **kwargs)
         if isiterable(stim):
             return result
         else:
             return result[0]
     else:
         return list(super()._iterate(stims, *args, **kwargs))
Пример #18
0
 def has_types(self, types, all_=True):
     ''' Check whether the current component list matches all Stim types
     in the types argument.
     Args:
         types (Stim, list): a Stim class or iterable of Stim classes.
         all_ (bool): if True, all input types must match; if False, at least
             one input type must match.
     Return:
         True if all passed types match at least one Stim in the component
         list, otherwise False.
     '''
     func = all if all_ else any
     return func([self.get_stim(t) for t in listify(types)])
Пример #19
0
    def run_node(self, node, stim):

        if isinstance(node, string_types):
            node = self.nodes[node]

        result = node.transformer.transform(stim)
        if isinstance(node.transformer, Extractor):
            return listify(result)

        stim = result
        # If result is a generator, the first child will destroy the
        # iterable, so cache via list conversion
        if len(node.children) > 1 and isgenerator(stim):
            stim = list(stim)
        return list(chain(*[self.run_node(c, stim) for c in node.children]))
Пример #20
0
    def has_types(self, types, all_=True):
        ''' Check whether the current component list matches all Stim types
        in the types argument.

        Args:
            types (Stim, list): a Stim class or iterable of Stim classes.
            all_ (bool): if True, all input types must match; if False, at
                least one input type must match.

        Returns:
            True if all passed types match at least one Stim in the component
            list, otherwise False.
        '''
        func = all if all_ else any
        return func([self.get_stim(t) for t in listify(types)])
Пример #21
0
 def _postprocess(self, stims, preds, tok, wds, ons, dur):
     data = preds[0].numpy().squeeze()
     if self.return_softmax:
         data = scipy.special.softmax(data)
     data = [listify(d) for d in data.tolist()]
     tok = [' '.join(wds)]
     try:
         dur = ons[-1] + dur[-1] - ons[0]
     except:
         dur = None
     ons = ons[0]
     feat = ['sent_pos', 'sent_neg']
     if self.return_input:
         data += tok
         feat += ['sequence']
     return data, feat, ons, dur
Пример #22
0
 def _extract(self, stim):
     values = self._get_values(stim)
     values = values.T
     feature_names = listify(self.get_feature_names())
     n_frames = len(values)
     onsets = librosa.frames_to_time(range(n_frames),
                                     sr=stim.sampling_rate,
                                     hop_length=self.hop_length)
     onsets = onsets + stim.onset if stim.onset else onsets
     durations = [self.hop_length / float(stim.sampling_rate)] * n_frames
     return ExtractorResult(values,
                            stim,
                            self,
                            features=feature_names,
                            onsets=onsets,
                            durations=durations)
Пример #23
0
 def __init__(self,
              pretrained_model='bert-base-uncased',
              tokenizer='bert-base-uncased',
              framework='pt',
              mask='MASK',
              top_n=None,
              threshold=None,
              target=None,
              return_softmax=False,
              return_masked_word=False,
              return_input=False,
              model_kwargs=None,
              tokenizer_kwargs=None):
     if any([top_n and target, top_n and threshold, threshold and target]):
         raise ValueError('top_n, threshold and target arguments '
                          'are mutually exclusive')
     if type(mask) not in [int, str]:
         raise ValueError('Mask must be a string or an integer.')
     super(BertLMExtractor,
           self).__init__(pretrained_model=pretrained_model,
                          tokenizer=tokenizer,
                          framework=framework,
                          return_input=return_input,
                          model_class='AutoModelWithLMHead',
                          model_kwargs=model_kwargs,
                          tokenizer_kwargs=tokenizer_kwargs)
     self.target = listify(target)
     if self.target:
         missing = set(self.target) - set(self.tokenizer.vocab.keys())
         if missing:
             logging.warning(f'{missing} not in vocabulary. Dropping.')
         present = set(self.target) & set(self.tokenizer.vocab.keys())
         self.target = list(present)
         if self.target == []:
             raise ValueError(
                 'No valid target token. Import transformers'
                 ' and run transformers.BertTokenizer.from_pretrained'
                 f'(\'{tokenizer}\').vocab.keys() to see available tokens')
     self.mask = mask
     self.top_n = top_n
     self.threshold = threshold
     self.return_softmax = return_softmax
     self.return_masked_word = return_masked_word
Пример #24
0
    def run_node(self, node, stim):
        ''' Executes the Transformer at a specific node.

        Args:
            node (str, Node): If a string, the name of the Node in the current
                Graph. Otherwise the Node instance to execute.
            stim (str, stim, list): Any valid input to the Transformer stored
                at the target node.
        '''
        if isinstance(node, string_types):
            node = self.nodes[node]

        result = node.transformer.transform(stim)
        if node.is_leaf():
            return listify(result)

        stim = result
        # If result is a generator, the first child will destroy the
        # iterable, so cache via list conversion
        if len(node.children) > 1 and isgenerator(stim):
            stim = list(stim)
        return list(chain(*[self.run_node(c, stim) for c in node.children]))
Пример #25
0
    def run_node(self, node, stim):
        ''' Executes the Transformer at a specific node.

        Args:
            node (str, Node): If a string, the name of the Node in the current
                Graph. Otherwise the Node instance to execute.
            stim (str, stim, list): Any valid input to the Transformer stored
                at the target node.
        '''
        if isinstance(node, string_types):
            node = self.nodes[node]

        result = node.transformer.transform(stim)
        if node.is_leaf():
            return listify(result)

        stim = result
        # If result is a generator, the first child will destroy the
        # iterable, so cache via list conversion
        if len(node.children) > 1 and isgenerator(stim):
            stim = list(stim)
        return list(chain(*[self.run_node(c, stim) for c in node.children]))
Пример #26
0
    def _extract(self, stim):
        
        values = self._get_values(stim)

        if self._feature=='beat_track':
            beats=np.array(values[1])
            values=beats

        values = values.T
        n_frames = len(values)

        feature_names = listify(self.get_feature_names())

        onsets = librosa.frames_to_time(range(n_frames),
                                        sr=stim.sampling_rate,
                                        hop_length=self.hop_length)
        
        onsets = onsets + stim.onset if stim.onset else onsets
        
        durations = [self.hop_length / float(stim.sampling_rate)] * n_frames
           
        return ExtractorResult(values, stim, self, features=feature_names,
                               onsets=onsets, durations=durations,
                               orders=list(range(n_frames)))
Пример #27
0
 def _extract(self, stim):
     values = self.func(stim.data)
     feature_names = listify(self.get_feature_names())
     return ExtractorResult(values, stim, self, features=feature_names)
Пример #28
0
 def env_keys(self):
     return listify(self._env_keys)
Пример #29
0
 def _to_df(self, result):
     cols = listify(self._feature)
     return pd.DataFrame([[r] for r in result._data], columns=cols)
Пример #30
0
 def get_feature_names(self, out):
     if self.features:
         return listify(self.features)
     else:
         return ['feature_' + str(i) for i in range(out.shape[-1])]
Пример #31
0
 def _to_df(self, result):
     cols = listify(self._feature)
     return pd.DataFrame([[r] for r in result._data], columns=cols)
Пример #32
0
 def _extract(self, stim):
     inp = self._preprocess(stim)
     out = self.model(inp)
     out = self._postprocess(out)
     features = self.get_feature_names(out)
     return ExtractorResult(listify(out), stim, self, features=features)