Пример #1
0
    def score(self, input_batch):
        """
        # Arguments
          input_batch: Input batch that should be scored.

        # Returns
          list: list of length `len(scores)`. Every element of the list is
             a stacked list of depth D if the model input is D-dimensional
             with identcal shape. Every entry of that list then contains the
             scores of the model output selected by `output_sel_fn`. Values
             are `None` if the input_batch already had a `1` at that position.
        """

        ref = self.model.predict_on_batch(input_batch)
        scores = []

        model_input_id = self.get_correct_model_input_id(self.model_input)
        for sample_i in range(
                get_model_input(input_batch,
                                input_id=model_input_id).shape[0]):
            # get the full set of model inputs for the selected sample
            sample_set = get_dataset_item(input_batch, sample_i)

            # get the reference output for this sample
            ref_sample_pred = get_dataset_item(ref, sample_i)

            # Apply the output selection function if defined
            if self.output_sel_fn is not None:
                ref_sample_pred = self.output_sel_fn(ref_sample_pred)

            # get the one-hot encoded reference input array
            input_sample = get_model_input(sample_set, input_id=model_input_id)

            # where we keep the scores - scores are lists (ordered by diff
            # method of ndarrays, lists or dictionaries - whatever is returned by the model

            score = np.empty(input_sample.shape, dtype=object)
            score[:] = None
            for alt_batch, alt_idxs in self._mutate_sample_batched(
                    input_sample):
                num_samples = len(alt_batch)
                mult_set = numpy_collate([sample_set] * num_samples)
                mult_set = set_model_input(mult_set,
                                           numpy_collate(alt_batch),
                                           input_id=model_input_id)
                alt = self.model.predict_on_batch(mult_set)
                for alt_sample_i in range(num_samples):
                    alt_sample = get_dataset_item(alt, alt_sample_i)
                    # Apply the output selection function if defined
                    if self.output_sel_fn is not None:
                        alt_sample = self.output_sel_fn(alt_sample)
                    # Apply scores across all model outputs for ref and alt
                    output_scores = [
                        apply_within(ref_sample_pred, alt_sample, scr)
                        for scr in self.scores
                    ]
                    score.__setitem__(alt_idxs[alt_sample_i], output_scores)
            scores.append(score.tolist())

        return scores
Пример #2
0
def test_BatchDataset(data):
    # BatchDataset example:
    class MyBatchDataset(BatchDataset):
        def __init__(self, data, batch_size=3):
            self.data = data
            self.batch_size = batch_size

        def __len__(self):
            return int(np.ceil(self.data["targets"].shape[0] /
                               self.batch_size))

        def __getitem__(self, idx):
            start = idx * self.batch_size
            end = min((idx + 1) * self.batch_size,
                      self.data["targets"].shape[0])
            return get_dataset_item(self.data, np.arange(start, end))

    # ------------------------
    d = MyBatchDataset(data)

    compare_arrays(d.load_all(), data)
    it = d.batch_iter()
    compare_arrays(next(it), get_dataset_item(data, np.arange(3)))

    # batch_train_iter
    d = MyBatchDataset(data, batch_size=2)
    it = d.batch_train_iter()
    for i in range(6):
        x, y = next(it)
    compare_arrays_x(x, get_dataset_item(data, np.arange(2))['inputs'])
    compare_arrays_y(y, get_dataset_item(data, np.arange(2))['targets'])
Пример #3
0
    def predict_all(self,
                    seq,
                    contrib_method='grad',
                    batch_size=512,
                    pred_summaries=['profile/wn', 'counts/pre-act']):
        """Make model prediction based
        """
        preds = self.predict(seq, batch_size=batch_size)

        if contrib_method is not None:
            contrib_scores = self.contrib_score_all(
                seq,
                method=contrib_method,
                aggregate_strand=True,
                batch_size=batch_size,
                pred_summaries=pred_summaries)
        else:
            contrib_scores = dict()

        out = [
            dict(
                seq=get_dataset_item(seq, i),
                # interval=regions[i],
                pred=get_dataset_item(preds, i),
                # TODO - shall we call it hyp_contrib score or contrib_score?
                contrib_score=get_dataset_item(contrib_scores, i),
            ) for i in range(len(seq))
        ]
        return out
Пример #4
0
def test_Dataset(data):
    # Dataset example:
    class MyDataset(Dataset):
        def __init__(self, data):
            self.data = data

        def __len__(self):
            return self.data["targets"].shape[0]

        def __getitem__(self, idx):
            return get_dataset_item(self.data, idx)

    # ------------------------

    d = MyDataset(data)

    compare_arrays(d.load_all(), data)
    it = d.batch_iter(3)
    compare_arrays(next(it), get_dataset_item(data, np.arange(3)))

    # test batch_train_iter
    it = d.batch_train_iter(batch_size=2)
    for i in range(6):
        x, y = next(it)
    compare_arrays_x(x, get_dataset_item(data, np.arange(2))['inputs'])
    compare_arrays_y(y, get_dataset_item(data, np.arange(2))['targets'])
Пример #5
0
 def __next__(self):
     idx = self.idx
     start = idx * self.batch_size
     if start >= self.data["targets"].shape[0]:
         raise StopIteration
     end = min((idx + 1) * self.batch_size,
               self.data["targets"].shape[0])
     self.idx += 1
     return get_dataset_item(self.data, np.arange(start, end))
Пример #6
0
def test_PreloadedDataset(data):
    # PreloadedDataset example:
    def data_fn():
        return data

    # ------------------------

    d = PreloadedDataset.from_fn(data_fn)()

    compare_arrays(d.load_all(), data)
    it = d.batch_iter(3)
    compare_arrays(next(it), get_dataset_item(data, np.arange(3)))

    # test batch_train_iter
    it = d.batch_train_iter(batch_size=2)
    for i in range(6):
        x, y = next(it)
    compare_arrays_x(x, get_dataset_item(data, np.arange(2))['inputs'])
    compare_arrays_y(y, get_dataset_item(data, np.arange(2))['targets'])
Пример #7
0
def nested_numpy_minibatch(data, batch_size=1):
    lens = get_dataset_lens(data)
    if isinstance(lens, collections.Mapping):
        ln = [v for v in lens.values()][0]
    elif isinstance(lens, collections.Sequence):
        ln = lens[0]
    else:
        ln = lens

    for idx in BatchSampler(range(ln), batch_size=batch_size, drop_last=False):
        yield get_dataset_item(data, idx)
Пример #8
0
def test_get_item(data):
    dlen = get_dataset_lens(data)[0]
    assert dlen == 3
    assert len(set(get_dataset_lens(data))) == 1
    assert get_dataset_item(data, 1) == {
        "a": [1],
        "b": {
            "d": 1
        },
        "c": np.array([1])
    }
Пример #9
0
def test_BatchIterator(data):
    # BatchIterator example:
    class MyBatchIterator(BatchIterator):
        def __init__(self, data, batch_size):
            self.data = data
            self.batch_size = batch_size
            self.idx = 0

        def __iter__(self):
            self.idx = 0
            return self

        def __next__(self):
            idx = self.idx
            start = idx * self.batch_size
            if start >= self.data["targets"].shape[0]:
                raise StopIteration
            end = min((idx + 1) * self.batch_size,
                      self.data["targets"].shape[0])
            self.idx += 1
            return get_dataset_item(self.data, np.arange(start, end))

        next = __next__

    # ------------------------

    d = MyBatchIterator(data, 3)

    compare_arrays(d.load_all(), data)
    d = MyBatchIterator(data, 3)
    it = d.batch_iter()
    compare_arrays(next(it), get_dataset_item(data, np.arange(3)))

    # test batch_train_iter
    d = MyBatchIterator(data, 2)
    it = d.batch_train_iter()
    for i in range(6):
        x, y = next(it)
    compare_arrays_x(x, get_dataset_item(data, np.arange(2))['inputs'])
    compare_arrays_y(y, get_dataset_item(data, np.arange(2))['targets'])
Пример #10
0
def test_SampleGenerator(data):
    # SampleGenerator example:
    def generator_fn(data):
        for idx in range(data["targets"].shape[0]):
            yield get_dataset_item(data, idx)

    # ------------------------

    d = SampleGenerator.from_fn(generator_fn)(data)

    compare_arrays(d.load_all(), data)
    d = SampleGenerator.from_fn(generator_fn)(data)

    it = d.batch_iter(batch_size=3)
    compare_arrays(next(it), get_dataset_item(data, np.arange(3)))

    d = SampleGenerator.from_fn(generator_fn)(data)
    it = d.batch_train_iter(batch_size=2)
    for i in range(6):
        x, y = next(it)
    compare_arrays_x(x, get_dataset_item(data, np.arange(2))['inputs'])
    compare_arrays_y(y, get_dataset_item(data, np.arange(2))['targets'])
Пример #11
0
def test_SampleIterator(data):
    # SampleIterator example:
    class MySampleIterator(SampleIterator):
        def __init__(self, data):
            self.data = data
            self.idx = 0

        def __iter__(self):
            self.idx = 0
            return self

        def __next__(self):
            if self.idx >= self.data["targets"].shape[0]:
                raise StopIteration
            ret = get_dataset_item(self.data, self.idx)
            self.idx += 1
            return ret

        next = __next__

    # ------------------------

    d = MySampleIterator(data)

    compare_arrays(d.load_all(), data)
    d = MySampleIterator(data)
    it = d.batch_iter(batch_size=3)
    compare_arrays(next(it), get_dataset_item(data, np.arange(3)))

    # train_iter
    d = MySampleIterator(data)
    it = d.batch_train_iter(batch_size=2)
    for i in range(6):
        x, y = next(it)
    compare_arrays_x(x, get_dataset_item(data, np.arange(2))['inputs'])
    compare_arrays_y(y, get_dataset_item(data, np.arange(2))['targets'])
Пример #12
0
def test_BatchGenerator(data):
    # BatchGenerator example:
    def generator_fn(data, batch_size):
        for idx in range(int(np.ceil(data["targets"].shape[0] / batch_size))):
            start = idx * batch_size
            end = min((idx + 1) * batch_size, data["targets"].shape[0])
            yield get_dataset_item(data, np.arange(start, end))

    # ------------------------

    d = BatchGenerator.from_fn(generator_fn)(data, 3)

    compare_arrays(d.load_all(), data)
    d = BatchGenerator.from_fn(generator_fn)(data, 3)

    it = d.batch_iter()
    compare_arrays(next(it), get_dataset_item(data, np.arange(3)))

    d = BatchGenerator.from_fn(generator_fn)(data, 2)
    it = d.batch_train_iter()
    for i in range(6):
        x, y = next(it)
    compare_arrays_x(x, get_dataset_item(data, np.arange(2))['inputs'])
    compare_arrays_y(y, get_dataset_item(data, np.arange(2))['targets'])
Пример #13
0
 def __getitem__(self, index):
     return get_dataset_item(self.data, index)
Пример #14
0
 def generator_fn(data):
     for idx in range(data["targets"].shape[0]):
         yield get_dataset_item(data, idx)
Пример #15
0
 def __next__(self):
     if self.idx >= self.data["targets"].shape[0]:
         raise StopIteration
     ret = get_dataset_item(self.data, self.idx)
     self.idx += 1
     return ret
Пример #16
0
 def generator_fn(data, batch_size):
     for idx in range(int(np.ceil(data["targets"].shape[0] / batch_size))):
         start = idx * batch_size
         end = min((idx + 1) * batch_size, data["targets"].shape[0])
         yield get_dataset_item(data, np.arange(start, end))
Пример #17
0
 def __getitem__(self, idx):
     start = idx * self.batch_size
     end = min((idx + 1) * self.batch_size,
               self.data["targets"].shape[0])
     return get_dataset_item(self.data, np.arange(start, end))