def score(self, input_batch): """ # Arguments input_batch: Input batch that should be scored. # Returns list: list of length `len(scores)`. Every element of the list is a stacked list of depth D if the model input is D-dimensional with identcal shape. Every entry of that list then contains the scores of the model output selected by `output_sel_fn`. Values are `None` if the input_batch already had a `1` at that position. """ ref = self.model.predict_on_batch(input_batch) scores = [] model_input_id = self.get_correct_model_input_id(self.model_input) for sample_i in range( get_model_input(input_batch, input_id=model_input_id).shape[0]): # get the full set of model inputs for the selected sample sample_set = get_dataset_item(input_batch, sample_i) # get the reference output for this sample ref_sample_pred = get_dataset_item(ref, sample_i) # Apply the output selection function if defined if self.output_sel_fn is not None: ref_sample_pred = self.output_sel_fn(ref_sample_pred) # get the one-hot encoded reference input array input_sample = get_model_input(sample_set, input_id=model_input_id) # where we keep the scores - scores are lists (ordered by diff # method of ndarrays, lists or dictionaries - whatever is returned by the model score = np.empty(input_sample.shape, dtype=object) score[:] = None for alt_batch, alt_idxs in self._mutate_sample_batched( input_sample): num_samples = len(alt_batch) mult_set = numpy_collate([sample_set] * num_samples) mult_set = set_model_input(mult_set, numpy_collate(alt_batch), input_id=model_input_id) alt = self.model.predict_on_batch(mult_set) for alt_sample_i in range(num_samples): alt_sample = get_dataset_item(alt, alt_sample_i) # Apply the output selection function if defined if self.output_sel_fn is not None: alt_sample = self.output_sel_fn(alt_sample) # Apply scores across all model outputs for ref and alt output_scores = [ apply_within(ref_sample_pred, alt_sample, scr) for scr in self.scores ] score.__setitem__(alt_idxs[alt_sample_i], output_scores) scores.append(score.tolist()) return scores
def test_BatchDataset(data): # BatchDataset example: class MyBatchDataset(BatchDataset): def __init__(self, data, batch_size=3): self.data = data self.batch_size = batch_size def __len__(self): return int(np.ceil(self.data["targets"].shape[0] / self.batch_size)) def __getitem__(self, idx): start = idx * self.batch_size end = min((idx + 1) * self.batch_size, self.data["targets"].shape[0]) return get_dataset_item(self.data, np.arange(start, end)) # ------------------------ d = MyBatchDataset(data) compare_arrays(d.load_all(), data) it = d.batch_iter() compare_arrays(next(it), get_dataset_item(data, np.arange(3))) # batch_train_iter d = MyBatchDataset(data, batch_size=2) it = d.batch_train_iter() for i in range(6): x, y = next(it) compare_arrays_x(x, get_dataset_item(data, np.arange(2))['inputs']) compare_arrays_y(y, get_dataset_item(data, np.arange(2))['targets'])
def predict_all(self, seq, contrib_method='grad', batch_size=512, pred_summaries=['profile/wn', 'counts/pre-act']): """Make model prediction based """ preds = self.predict(seq, batch_size=batch_size) if contrib_method is not None: contrib_scores = self.contrib_score_all( seq, method=contrib_method, aggregate_strand=True, batch_size=batch_size, pred_summaries=pred_summaries) else: contrib_scores = dict() out = [ dict( seq=get_dataset_item(seq, i), # interval=regions[i], pred=get_dataset_item(preds, i), # TODO - shall we call it hyp_contrib score or contrib_score? contrib_score=get_dataset_item(contrib_scores, i), ) for i in range(len(seq)) ] return out
def test_Dataset(data): # Dataset example: class MyDataset(Dataset): def __init__(self, data): self.data = data def __len__(self): return self.data["targets"].shape[0] def __getitem__(self, idx): return get_dataset_item(self.data, idx) # ------------------------ d = MyDataset(data) compare_arrays(d.load_all(), data) it = d.batch_iter(3) compare_arrays(next(it), get_dataset_item(data, np.arange(3))) # test batch_train_iter it = d.batch_train_iter(batch_size=2) for i in range(6): x, y = next(it) compare_arrays_x(x, get_dataset_item(data, np.arange(2))['inputs']) compare_arrays_y(y, get_dataset_item(data, np.arange(2))['targets'])
def __next__(self): idx = self.idx start = idx * self.batch_size if start >= self.data["targets"].shape[0]: raise StopIteration end = min((idx + 1) * self.batch_size, self.data["targets"].shape[0]) self.idx += 1 return get_dataset_item(self.data, np.arange(start, end))
def test_PreloadedDataset(data): # PreloadedDataset example: def data_fn(): return data # ------------------------ d = PreloadedDataset.from_fn(data_fn)() compare_arrays(d.load_all(), data) it = d.batch_iter(3) compare_arrays(next(it), get_dataset_item(data, np.arange(3))) # test batch_train_iter it = d.batch_train_iter(batch_size=2) for i in range(6): x, y = next(it) compare_arrays_x(x, get_dataset_item(data, np.arange(2))['inputs']) compare_arrays_y(y, get_dataset_item(data, np.arange(2))['targets'])
def nested_numpy_minibatch(data, batch_size=1): lens = get_dataset_lens(data) if isinstance(lens, collections.Mapping): ln = [v for v in lens.values()][0] elif isinstance(lens, collections.Sequence): ln = lens[0] else: ln = lens for idx in BatchSampler(range(ln), batch_size=batch_size, drop_last=False): yield get_dataset_item(data, idx)
def test_get_item(data): dlen = get_dataset_lens(data)[0] assert dlen == 3 assert len(set(get_dataset_lens(data))) == 1 assert get_dataset_item(data, 1) == { "a": [1], "b": { "d": 1 }, "c": np.array([1]) }
def test_BatchIterator(data): # BatchIterator example: class MyBatchIterator(BatchIterator): def __init__(self, data, batch_size): self.data = data self.batch_size = batch_size self.idx = 0 def __iter__(self): self.idx = 0 return self def __next__(self): idx = self.idx start = idx * self.batch_size if start >= self.data["targets"].shape[0]: raise StopIteration end = min((idx + 1) * self.batch_size, self.data["targets"].shape[0]) self.idx += 1 return get_dataset_item(self.data, np.arange(start, end)) next = __next__ # ------------------------ d = MyBatchIterator(data, 3) compare_arrays(d.load_all(), data) d = MyBatchIterator(data, 3) it = d.batch_iter() compare_arrays(next(it), get_dataset_item(data, np.arange(3))) # test batch_train_iter d = MyBatchIterator(data, 2) it = d.batch_train_iter() for i in range(6): x, y = next(it) compare_arrays_x(x, get_dataset_item(data, np.arange(2))['inputs']) compare_arrays_y(y, get_dataset_item(data, np.arange(2))['targets'])
def test_SampleGenerator(data): # SampleGenerator example: def generator_fn(data): for idx in range(data["targets"].shape[0]): yield get_dataset_item(data, idx) # ------------------------ d = SampleGenerator.from_fn(generator_fn)(data) compare_arrays(d.load_all(), data) d = SampleGenerator.from_fn(generator_fn)(data) it = d.batch_iter(batch_size=3) compare_arrays(next(it), get_dataset_item(data, np.arange(3))) d = SampleGenerator.from_fn(generator_fn)(data) it = d.batch_train_iter(batch_size=2) for i in range(6): x, y = next(it) compare_arrays_x(x, get_dataset_item(data, np.arange(2))['inputs']) compare_arrays_y(y, get_dataset_item(data, np.arange(2))['targets'])
def test_SampleIterator(data): # SampleIterator example: class MySampleIterator(SampleIterator): def __init__(self, data): self.data = data self.idx = 0 def __iter__(self): self.idx = 0 return self def __next__(self): if self.idx >= self.data["targets"].shape[0]: raise StopIteration ret = get_dataset_item(self.data, self.idx) self.idx += 1 return ret next = __next__ # ------------------------ d = MySampleIterator(data) compare_arrays(d.load_all(), data) d = MySampleIterator(data) it = d.batch_iter(batch_size=3) compare_arrays(next(it), get_dataset_item(data, np.arange(3))) # train_iter d = MySampleIterator(data) it = d.batch_train_iter(batch_size=2) for i in range(6): x, y = next(it) compare_arrays_x(x, get_dataset_item(data, np.arange(2))['inputs']) compare_arrays_y(y, get_dataset_item(data, np.arange(2))['targets'])
def test_BatchGenerator(data): # BatchGenerator example: def generator_fn(data, batch_size): for idx in range(int(np.ceil(data["targets"].shape[0] / batch_size))): start = idx * batch_size end = min((idx + 1) * batch_size, data["targets"].shape[0]) yield get_dataset_item(data, np.arange(start, end)) # ------------------------ d = BatchGenerator.from_fn(generator_fn)(data, 3) compare_arrays(d.load_all(), data) d = BatchGenerator.from_fn(generator_fn)(data, 3) it = d.batch_iter() compare_arrays(next(it), get_dataset_item(data, np.arange(3))) d = BatchGenerator.from_fn(generator_fn)(data, 2) it = d.batch_train_iter() for i in range(6): x, y = next(it) compare_arrays_x(x, get_dataset_item(data, np.arange(2))['inputs']) compare_arrays_y(y, get_dataset_item(data, np.arange(2))['targets'])
def __getitem__(self, index): return get_dataset_item(self.data, index)
def generator_fn(data): for idx in range(data["targets"].shape[0]): yield get_dataset_item(data, idx)
def __next__(self): if self.idx >= self.data["targets"].shape[0]: raise StopIteration ret = get_dataset_item(self.data, self.idx) self.idx += 1 return ret
def generator_fn(data, batch_size): for idx in range(int(np.ceil(data["targets"].shape[0] / batch_size))): start = idx * batch_size end = min((idx + 1) * batch_size, data["targets"].shape[0]) yield get_dataset_item(data, np.arange(start, end))
def __getitem__(self, idx): start = idx * self.batch_size end = min((idx + 1) * self.batch_size, self.data["targets"].shape[0]) return get_dataset_item(self.data, np.arange(start, end))