def contrib_score(self, x, name, method='grad', batch_size=512, preact_only=False): """Compute the contribution score Args: x: one-hot encoded DNA sequence name: which interepretation method to compute method: which contribution score to use. Available: grad or deeplift """ # Do we need bias? if not isinstance(x, dict) and not isinstance(x, list): seqlen = x.shape[1] x = {'seq': x, **self.neutral_bias_inputs(len(x), seqlen=seqlen)} if method == "deeplift": fn = self._contrib_deeplift_fn(name=name, preact_only=preact_only) elif method == "grad": fn = self._contrib_grad_fn(x, name, preact_only=preact_only) else: raise ValueError( "Please provide a valid contribution scoring method: grad, deeplift" ) def input_to_list(input_names, x): if isinstance(x, list): return x elif isinstance(x, dict): return [x[k] for k in input_names] else: return [x] input_names = self.model.input_names assert input_names[0] == "seq" if batch_size is None: return fn(input_to_list(input_names, x))[0] else: return numpy_collate_concat([ fn(input_to_list(input_names, batch))[0] for batch in nested_numpy_minibatch(x, batch_size=batch_size) ])
def feature_importance(model, dataloader, importance_score, importance_score_kwargs={}, batch_size=32, num_workers=0): """Return feature importance scores # Arguments model: kipoi model (obtained by `kipoi.get_model()`) dataloader: instantiated kipoi dataloder (obtained by `kipoi.get_dataloader_factory()(**dl_kwargs)` or `model.default_dataloader(**dl_kwargs)` importance_score (`str` or `ImportanceScore`): which importance score to use importance_score_kwargs (dict): kwargs passed to the importance score batch_size: run scoring and data-loading in batches num_workers: number of workers for parallel data-loading. Passed to `dataloader.batch_iter(...)` # Returns (dict of np.arrays): dataset returned by the dataloader (dict with keys `inputs`, `targets`, `metadata`) but with an additional `importance_scores` key """ ImpScore = get_importance_score(importance_score) if not ImpScore.is_compatible(model): raise ValueError( "model not compatible with score: {0}".format(importance_score)) impscore = ImpScore(model, **importance_score_kwargs) def append_key(d, k, v): d[k] = v return d # TODO - handle the reference-based importance scores... return numpy_collate_concat([ append_key(batch, "importance_scores", impscore.score(batch['inputs'])) for batch in tqdm( dataloader.batch_iter(batch_size=batch_size, num_workers=num_workers)) ])
def concat(cls, objects): return cls(data=numpy_collate_concat(objects), attrs=None)
def append(self, datax): """Append two datasets """ return super().__init__(data=numpy_collate_concat( [self.data, datax.data]), attrs=deepcopy(self.attrs))