Пример #1
0
    def contrib_score(self,
                      x,
                      name,
                      method='grad',
                      batch_size=512,
                      preact_only=False):
        """Compute the contribution score

        Args:
          x: one-hot encoded DNA sequence
          name: which interepretation method to compute
          method: which contribution score to use. Available: grad or deeplift
        """
        # Do we need bias?
        if not isinstance(x, dict) and not isinstance(x, list):
            seqlen = x.shape[1]
            x = {'seq': x, **self.neutral_bias_inputs(len(x), seqlen=seqlen)}

        if method == "deeplift":
            fn = self._contrib_deeplift_fn(name=name, preact_only=preact_only)
        elif method == "grad":
            fn = self._contrib_grad_fn(x, name, preact_only=preact_only)
        else:
            raise ValueError(
                "Please provide a valid contribution scoring method: grad, deeplift"
            )

        def input_to_list(input_names, x):
            if isinstance(x, list):
                return x
            elif isinstance(x, dict):
                return [x[k] for k in input_names]
            else:
                return [x]

        input_names = self.model.input_names
        assert input_names[0] == "seq"

        if batch_size is None:
            return fn(input_to_list(input_names, x))[0]
        else:
            return numpy_collate_concat([
                fn(input_to_list(input_names, batch))[0]
                for batch in nested_numpy_minibatch(x, batch_size=batch_size)
            ])
Пример #2
0
def feature_importance(model,
                       dataloader,
                       importance_score,
                       importance_score_kwargs={},
                       batch_size=32,
                       num_workers=0):
    """Return feature importance scores

    # Arguments
        model: kipoi model (obtained by `kipoi.get_model()`)
        dataloader: instantiated kipoi dataloder (obtained by `kipoi.get_dataloader_factory()(**dl_kwargs)`
           or `model.default_dataloader(**dl_kwargs)`
        importance_score (`str` or `ImportanceScore`): which importance score to use
        importance_score_kwargs (dict): kwargs passed to the importance score
        batch_size: run scoring and data-loading in batches
        num_workers: number of workers for parallel data-loading. Passed to `dataloader.batch_iter(...)`

    # Returns
        (dict of np.arrays): dataset returned by the dataloader (dict with keys `inputs`, `targets`, `metadata`)
           but with an additional `importance_scores` key 

    """
    ImpScore = get_importance_score(importance_score)
    if not ImpScore.is_compatible(model):
        raise ValueError(
            "model not compatible with score: {0}".format(importance_score))
    impscore = ImpScore(model, **importance_score_kwargs)

    def append_key(d, k, v):
        d[k] = v
        return d

    # TODO - handle the reference-based importance scores...
    return numpy_collate_concat([
        append_key(batch, "importance_scores", impscore.score(batch['inputs']))
        for batch in tqdm(
            dataloader.batch_iter(batch_size=batch_size,
                                  num_workers=num_workers))
    ])
Пример #3
0
 def concat(cls, objects):
     return cls(data=numpy_collate_concat(objects), attrs=None)
Пример #4
0
 def append(self, datax):
     """Append two datasets
     """
     return super().__init__(data=numpy_collate_concat(
         [self.data, datax.data]),
                             attrs=deepcopy(self.attrs))