Пример #1
0
def time_delay_generator(x, y, delays, batch_size, weights=None, shuffle=True):
    '''A generator to make it easy to fit time-delay regression models,
    i.e. a model where the value of y depends on past values of x

    # Arguments
    x: input data, as a Numpy array
    y: targets, as a Numpy array or None for prediction generation
    delays: number of time-steps to include in model
    weights: Numpy array of weights for the samples
    shuffle: Whether or not to shuffle the data (set True for training)

    # Example
    if X_train is (1000,200), Y_train is (1000,1)
    train_gen = time_delay_generator(X_train, Y_train, delays=10, batch_size=100)

    train_gen is a generator that gives:
    x_batch as size (100,10,200) since each of the 100 samples includes the input
    data at the current and nine previous time steps
    y_batch as size (100,1)
    w_batch as size (100,)

    '''

    if type(delays) is int:
        delays = range(delays)

    if type(x) is not list:
        x = list([x])
    index_array = np.arange(x[0].shape[0])

    tlists = [[1, 0] + list(range(2, np.ndim(xx) + 1)) for xx in x]
    batches = _make_batches(x[0].shape[0], batch_size)
    while 1:
        if shuffle:
            np.random.shuffle(index_array)
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            batch_ids_delay = [
                np.minimum(np.maximum(0, batch_ids - d), x[0].shape[0] - 1)
                for d in delays
            ]
            x_batch = _standardize_input_data([
                xx[batch_ids_delay, :].transpose(tt)
                for xx, tt in zip(x, tlists)
            ], ['x_batch' + str(i) for i in range(1,
                                                  len(x) + 1)])
            if y is None:
                yield x_batch
            else:
                y_batch = _standardize_input_data(y[batch_ids, :], ['y_batch'])
                if weights is not None:
                    w_batch = weights[batch_ids, :][:, 0]
                else:
                    w_batch = np.ones(x_batch[0].shape[0])
                w_batch[batch_ids < delays[-1]] = 0.
                w_batch = _standardize_sample_weights(w_batch, ['w_batch'])
                yield (x_batch, y_batch, w_batch)
Пример #2
0
def _standardize_user_data(model,
                           x,
                           y,
                           sample_weight=None,
                           class_weight=None,
                           check_batch_dim=True,
                           batch_size=None):
    if not hasattr(model, 'optimizer'):
        raise Exception('You must compile a model before training/testing.'
                        ' Use `model.compile(optimizer, loss)`.')

    output_shapes = []
    for output_shape, loss_fn in zip(model.internal_output_shapes,
                                     model.loss_functions):
        if loss_fn.__name__ == 'sparse_categorical_crossentropy':
            output_shapes.append(output_shape[:-1] + (1, ))
        elif getattr(losses, loss_fn.__name__, None) is None:
            output_shapes.append(None)
        else:
            output_shapes.append(output_shape)
    x = _standardize_input_data(x,
                                model.input_names,
                                model.internal_input_shapes,
                                exception_prefix='model input')
    y = _standardize_input_data(y,
                                model.output_names,
                                output_shapes,
                                exception_prefix='model target')
    sample_weights = _standardize_sample_weights(sample_weight,
                                                 model.output_names)
    class_weights = _standardize_class_weights(class_weight,
                                               model.output_names)
    sample_weights = [
        _standardize_weights(ref, sw, cw, mode) for (ref, sw, cw, mode) in zip(
            y, sample_weights, class_weights, model.sample_weight_modes)
    ]
    '''
    We only need to comment out check_array_lengeh(x, y, weights) in the next line to
    let the model compile and train.
    '''
    # check_array_lengths(x, y, sample_weights)

    _check_loss_and_target_compatibility(y, model.loss_functions,
                                         model.internal_output_shapes)
    if model.stateful and batch_size:
        if x[0].shape[0] % batch_size != 0:
            raise Exception('In a stateful network, '
                            'you should only pass inputs with '
                            'a number of samples that can be '
                            'divided by the batch size. Found: ' +
                            str(x[0].shape[0]) + ' samples')
    return x, y, sample_weights
Пример #3
0
def time_delay_generator_jitter(x, y, delays, batch_size, weights=None, shuffle=True, conv3d=False, jitter=True, jitter_axes=[3,4], max_jitter=1):
    '''A generator to make it easy to fit time-delay regression models,
    i.e. a model where the value of y depends on past values of x

    # Arguments
    x: input data, as a Numpy array
    y: targets, as a Numpy array or None for prediction generation
    delays: number of time-steps to include in model
    weights: Numpy array of weights for the samples
    shuffle: Whether or not to shuffle the data (set True for training)

    # Example
    if X_train is (1000,200), Y_train is (1000,1)
    train_gen = time_delay_generator(X_train, Y_train, delays=10, batch_size=100)

    train_gen is a generator that gives:
    x_batch as size (100,10,200) since each of the 100 samples includes the input
    data at the current and nine previous time steps
    y_batch as size (100,1)
    w_batch as size (100,)

    '''
    index_array = np.arange(x.shape[0])
    if conv3d:
        tlist = [1, 2, 0] + range(3, np.ndim(x) + 1)
    else:
        tlist = [1, 0] + range(2, np.ndim(x) + 1)
    batches = _make_batches(x.shape[0], batch_size)
    while 1:
        if shuffle:
            np.random.shuffle(index_array)
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            batch_ids = [np.maximum(0, batch_ids - d) for d in range(delays)]
            x_batch = x[batch_ids, :].transpose(tlist)
            if jitter:
                for j in jitter_axes:
                    x_batch = np.roll(x_batch, np.random.randint(-max_jitter,max_jitter+1), axis=j)
            x_batch = _standardize_input_data(x_batch, ['x_batch'])
            if y is None:
                yield x_batch
            else:
                y_batch = _standardize_input_data(y[batch_ids[0], :], ['y_batch'])
                if weights is not None:
                    w_batch = weights[batch_ids[0], :][:, 0]
                else:
                    w_batch = np.ones(x_batch[0].shape[0])
                w_batch[batch_ids[0] < delays] = 0.
                w_batch = _standardize_sample_weights(w_batch, ['w_batch'])
                yield (x_batch, y_batch, w_batch)
Пример #4
0
    def _input_grad(self,
                    x,
                    layer,
                    filter_slices=None,
                    filter_func=None,
                    filter_func_kwargs=None):
        """Adapted from keras.engine.training.predict_on_batch. Returns gradients for a single batch of samples.

        # Arguments
            x: Input samples, as a Numpy array.

        # Returns
            Numpy array(s) of predictions.
        """
        from keras.engine.training import _standardize_input_data
        from keras import backend as K
        x = _standardize_input_data(x, self.model._feed_input_names,
                                    self.model._feed_input_shapes)
        if self.model.uses_learning_phase and not isinstance(
                K.learning_phase(), int):
            ins = x + [0.]
        else:
            ins = x
        gf = self.__generate_direct_saliency_functions__(
            layer, filter_slices, filter_func, filter_func_kwargs)
        outputs = gf(ins)
        if len(outputs) == 1:
            return outputs[0]
        return outputs
Пример #5
0
def time_delay_generator_conv(x, filt_length, frames_per_TR, TRs_in_model, y=None, weights=None):
    '''A generator to make it easy to fit time-delay regression models,
    i.e. a model where the value of y depends on past values of x

    # Arguments
    x: input data, as a Numpy array
    y: targets, as a Numpy array or None for prediction generation
    delays: number of time-steps to include in model
    weights: Numpy array of weights for the samples
    shuffle: Whether or not to shuffle the data (set True for training)

    # Example
    if X_train is (1000,200), Y_train is (1000,1)
    train_gen = time_delay_generator(X_train, Y_train, delays=10, batch_size=100)

    train_gen is a generator that gives:
    x_batch as size (100,10,200) since each of the 100 samples includes the input
    data at the current and nine previous time steps
    y_batch as size (100,1)
    w_batch as size (100,)

    '''
    batch_size = frames_per_TR*TRs_in_model + filt_length - 1
    x_size_expand = int(np.ceil((x.shape[0] - batch_size)/float(frames_per_TR))*frames_per_TR + batch_size)
    batches = _make_batches_overlap(x_size_expand, batch_size, frames_per_TR*(TRs_in_model-1)+filt_length-1, filt_length)
    print(batches)
    index_array = np.minimum(x.shape[0]-1, np.arange(0, x_size_expand))
    while 1:
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            x_batch = _standardize_input_data(x[batch_ids, :][None, :], ['x_batch'])
            yield x_batch
Пример #6
0
def predict(self,
            x,
            batch_size=None,
            learning_phase=0.,
            verbose=0,
            steps=None):
    """Generates output predictions for the input samples.

        Computation is done in batches.

        # Arguments
            x: the input data, as a Numpy array
                (or list of Numpy arrays if the model has multiple outputs).
            batch_size: integer.
            verbose: verbosity mode, 0 or 1.
            steps: Total number of steps (batches of samples)
                before declaring the prediction round finished.
                Ignored with the default value of `None`.

        # Returns
            Numpy array(s) of predictions.

        # Raises
            ValueError: In case of mismatch between the provided
                input data and the model's expectations,
                or in case a stateful model receives a number of samples
                that is not a multiple of the batch size.

        [A tweaked version.]
        """
    # Backwards compatibility.
    if batch_size is None and steps is None:
        batch_size = 32
    if x is None and steps is None:
        raise ValueError('If predicting from data tensors, '
                         'you should specify the `steps` '
                         'argument.')
    # validate user data
    x = _standardize_input_data(x,
                                self._feed_input_names,
                                self._feed_input_shapes,
                                check_batch_axis=False)
    if self.stateful:
        if x[0].shape[0] > batch_size and x[0].shape[0] % batch_size != 0:
            raise ValueError('In a stateful network, '
                             'you should only pass inputs with '
                             'a number of samples that can be '
                             'divided by the batch size. Found: ' +
                             str(x[0].shape[0]) + ' samples. '
                             'Batch size: ' + str(batch_size) + '.')

    # prepare inputs, delegate logic to _predict_loop
    if self.uses_learning_phase and not isinstance(K.learning_phase(), int):
        ins = x + [learning_phase]
    else:
        ins = x
    self._make_predict_function()
    f = self.predict_function
    return self._predict_loop(f, ins, batch_size=batch_size, verbose=verbose)
def standardize_predict_inputs(model: Model,
                               x: np.ndarray) -> List[np.ndarray]:
    x = _standardize_input_data(x, model._feed_input_names,
                                model._feed_input_shapes)
    if model.uses_learning_phase and not isinstance(K.learning_phase(), int):
        ins = x + [0.]
    else:
        ins = x
    return ins
Пример #8
0
    def predict(self,
                X,
                X_tr=None,
                Y_tr=None,
                batch_size=32,
                return_var=False,
                verbose=0):
        """Generate output predictions for the input samples batch by batch.

        Arguments:
        ----------
            X : np.ndarray or list of np.ndarrays
            batch_size : uint (default: 128)
            return_var : bool (default: False)
                Whether predictive variance is returned.
            verbose : uint (default: 0)
                Verbosity mode, 0 or 1.

        Returns:
        --------
            preds : a list or a tuple of lists
                Lists of output predictions and variance estimates.
        """
        # Update GP data if provided (and grid if necessary)
        if X_tr is not None and Y_tr is not None:
            X_tr, Y_tr, _ = self._standardize_user_data(X_tr,
                                                        Y_tr,
                                                        sample_weight=None,
                                                        class_weight=None,
                                                        check_batch_axis=False,
                                                        batch_size=batch_size)
            H_tr = self.transform(X_tr, batch_size=batch_size)
            for gp, h, y in zip(self.output_gp_layers, H_tr, Y_tr):
                gp.backend.update_data('tr', h, y)
                if gp.update_grid:
                    gp.backend.update_grid('tr')

        # Validate user data
        X = _standardize_input_data(X,
                                    self._feed_input_names,
                                    self._feed_input_shapes,
                                    check_batch_axis=False,
                                    exception_prefix='input')

        H = self.transform(X, batch_size=batch_size)

        preds = []
        for gp, h in zip(self.output_gp_layers, H):
            preds.append(gp.backend.predict(h, return_var=return_var))

        if return_var:
            preds = map(list, zip(*preds))

        return preds
Пример #9
0
def time_delay_generator_AE(x, delays, batch_size, shuffle=True, conv3d=False):
    '''A generator to make it easy to fit time-delay regression models,
    i.e. a model where the value of y depends on past values of x

    # Arguments
    x: input data, as a Numpy array
    y: targets, as a Numpy array or None for prediction generation
    delays: number of time-steps to include in model
    weights: Numpy array of weights for the samples
    shuffle: Whether or not to shuffle the data (set True for training)

    # Example
    if X_train is (1000,200), Y_train is (1000,1)
    train_gen = time_delay_generator(X_train, Y_train, delays=10, batch_size=100)

    train_gen is a generator that gives:
    x_batch as size (100,10,200) since each of the 100 samples includes the input
    data at the current and nine previous time steps
    y_batch as size (100,1)
    w_batch as size (100,)

    '''
    index_array = np.arange(x.shape[0])
    if conv3d:
        tlist = [1, 2, 0] + range(3, np.ndim(x) + 1)
    else:
        tlist = [1, 0] + range(2, np.ndim(x) + 1)
    batches = _make_batches(x.shape[0], batch_size)
    while 1:
        if shuffle:
            np.random.shuffle(index_array)
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            batch_ids = [np.maximum(0, batch_ids - d) for d in range(delays)]
            x_batch = _standardize_input_data(x[batch_ids, :].transpose(tlist),
                                              ['x_batch'])
            y_batch = _standardize_input_data(
                np.copy(x_batch[0]).reshape((x_batch[0].shape[0], -1)),
                ['y_batch'])
            yield (x_batch, y_batch)
Пример #10
0
def generate_training_data(train_gen, batch_num):
    zoo_input_data = []
    zoo_label = []
    count = 0
    while True:
        for tag, generator in train_gen.items():
            genfun = generator.get_batch_generator()
            for input_data, y_true_value in genfun:
                count += 1
                if count > batch_num:
                    return (zoo_input_data, zoo_label)
                names = ['query', 'doc']
                shapes = [(None, 10), (None, 40)]
                list_input_data = _standardize_input_data(
                    input_data, names, shapes, check_batch_axis=False)
                zoo_input_data.append(list_input_data)
                y_true_value = np.expand_dims(y_true_value, 1)
                zoo_label.append(y_true_value)
Пример #11
0
def eval(eval_gen, eval_metrics, zmodel):
    for tag, generator in eval_gen.items():

        genfun = generator.get_batch_generator()

        print('[%s]\t[Eval:%s] ' % (time.strftime(
            '%m-%d-%Y %H:%M:%S', time.localtime(time.time())), tag),
              end='')
        res = dict([[k, 0.] for k in eval_metrics.keys()])
        num_valid = 0
        for input_data, y_true in genfun:
            names = ['query', 'doc']
            shapes = [(None, 10), (None, 40)]
            list_input_data = _standardize_input_data(input_data,
                                                      names,
                                                      shapes,
                                                      check_batch_axis=False)

            preprocessed_input_data = np.concatenate(
                (list_input_data[0], list_input_data[1]), axis=1)

            y_pred = zmodel.forward(preprocessed_input_data)
            if issubclass(type(generator),
                          inputs.list_generator.ListBasicGenerator):
                list_counts = input_data['list_counts']
                for k, eval_func in eval_metrics.items():
                    for lc_idx in range(len(list_counts) - 1):
                        pre = list_counts[lc_idx]
                        suf = list_counts[lc_idx + 1]
                        res[k] += eval_func(y_true=y_true[pre:suf],
                                            y_pred=y_pred[pre:suf])
                num_valid += len(list_counts) - 1
            else:
                for k, eval_func in eval_metrics.items():
                    res[k] += eval_func(y_true=y_true, y_pred=y_pred)
                num_valid += 1
        generator.reset()
        i_e = 0
        print('Iter:%d\t%s' % (i_e, '\t'.join(
            ['%s=%f' % (k, v / num_valid) for k, v in res.items()])),
              end='\n')
        sys.stdout.flush()
Пример #12
0
def predict(self, x, batch_size=32, learning_phase=0., verbose=0):
    """Generates output predictions for the input samples.

        Computation is done in batches.

        # Arguments
            x: the input data, as a Numpy array
                (or list of Numpy arrays if the model has multiple outputs).
            batch_size: integer.
            verbose: verbosity mode, 0 or 1.

        # Returns
            Numpy array(s) of predictions.

        # Raises
            ValueError: In case of mismatch between the provided
                input data and the model's expectations,
                or in case a stateful model receives a number of samples
                that is not a multiple of the batch size.
        """
    # validate user data
    x = _standardize_input_data(x,
                                self._feed_input_names,
                                self._feed_input_shapes,
                                check_batch_axis=False)
    if self.stateful:
        if x[0].shape[0] > batch_size and x[0].shape[0] % batch_size != 0:
            raise ValueError('In a stateful network, '
                             'you should only pass inputs with '
                             'a number of samples that can be '
                             'divided by the batch size. Found: ' +
                             str(x[0].shape[0]) + ' samples. '
                             'Batch size: ' + str(batch_size) + '.')

    # prepare inputs, delegate logic to _predict_loop
    if self.uses_learning_phase and not isinstance(K.learning_phase(), int):
        ins = x + [learning_phase]
    else:
        ins = x
    self._make_predict_function()
    f = self.predict_function
    return self._predict_loop(f, ins, batch_size=batch_size, verbose=verbose)
Пример #13
0
    def finetune(self, X, Y, batch_size=32, gp_n_iter=1, verbose=1):
        """Finetune the output GP layers assuming the network is pre-trained.

        Arguments:
        ----------
            X : np.ndarray or list of np.ndarrays
            Y : np.ndarray or list of np.ndarrays
            batch_size : uint (default: 128)
                Batch size used for data streaming through the network.
            gp_n_iter : uint (default: 100)
                Number of iterations for GP training.
            verbose : uint (default: 1)
                Verbosity mode, 0 or 1.
        """
        # Validate user data
        X = _standardize_input_data(X,
                                    self._feed_input_names,
                                    self._feed_input_shapes,
                                    check_batch_axis=False,
                                    exception_prefix='input')

        H = self.transform(X, batch_size=batch_size)

        if verbose:
            print("Finetuning output GPs...")

        for gp, h, y in zip(self.output_gp_layers, H, Y):
            # Update GP data (and grid if necessary)
            gp.backend.update_data('tr', h, y)
            if gp.update_grid:
                gp.backend.update_grid('tr')

            # Train GP
            gp.hyp = gp.backend.train(gp_n_iter, verbose=verbose)

        if verbose:
            print("Done.")
    def keras_generator(self, delays=7, batch_size=400, cell=0, scale=5, flatten=True, center=None, crop_size=None, shuffle=True, color_chan=False, log_transform_events=True, correct_eye_pos=False, gaussian_filter=0):
        from keras.engine.training import _standardize_input_data, _make_batches, _standardize_sample_weights
        
        if type(cell) is int:
            cell = [cell]

        if type(delays) is int:
            delays = range(delays)

        (stim, events, frame_numbers, weights, shifts) = self.vectorize_data(delays)


        evidx = np.where(events)[0]
        print(str(len(frame_numbers)) + ' Samples')
        print(str(len(evidx)) + ' Events')

        if correct_eye_pos:
            sh = stim.shape
            shift_stim_shape = (len(shifts),
                                sh[1] + 2*np.maximum(self.min_max_shift[1][0], -self.min_max_shift[0][0]) + 3,
                                sh[2] + 2*np.maximum(self.min_max_shift[1][1], -self.min_max_shift[0][1]) + 3)


            out_stim = np.zeros(shift_stim_shape, dtype='float32')


            shifts = shifts + [shift_stim_shape[1]/2, shift_stim_shape[2]/2]
            good_shift_locations = ~np.isnan(shifts[:, 0])
            for dd in delays:
                weights[np.minimum(np.where(np.isnan(shifts[:,0]))[0] + dd, len(weights)-1)] = 0

            for i in range(len(shifts)):
                if good_shift_locations[i]:
                    # print(-sh[1]/2 + np.int32(shifts[i, 0]))
                    # print(np.int32(shifts[i, 0]) + sh[1]/2)
                    out_stim[i, -sh[1]/2 + np.int32(shifts[i, 0]):np.int32(shifts[i, 0]) + sh[1]/2,
                                -sh[2]/2 + np.int32(shifts[i, 1]):np.int32(shifts[i, 1]) + sh[2]/2] = stim[frame_numbers[i]]

            stim = out_stim
            frame_numbers_i = np.arange(len(frame_numbers))
        else:
            frame_numbers_i = frame_numbers

        if color_chan:
            stim = stim[:, None, :, :]

        if crop_size is not None and center is not None:
            crop_range = np.arange(-crop_size/2, crop_size/2)
            stim = stim[:, (center[0]-crop_size/2):(center[0]+crop_size/2), (center[1]-crop_size/2):(center[1]+crop_size/2)]

        if flatten:
            stim = stim.reshape(stim.shape[0], -1)

        events = np.asarray(events)
        events = events[cell].T * scale


        if log_transform_events:
            events = np.log(1 + events)


        if gaussian_filter > 0:
            events = gaussian_filter1d(events, gaussian_filter)

        index_array = np.arange(events.shape[0])

        tlist = [1, 0] + list(range(2, np.ndim(stim) + 1))
        batches = _make_batches(events.shape[0], batch_size)
        while 1:
            if shuffle:
                np.random.shuffle(index_array)
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]
                frame_numbers_b = frame_numbers[batch_ids]
                batch_ids_stim = [frame_numbers_i[np.maximum(0, batch_ids - d)] for d in delays]
                x_batch = _standardize_input_data(stim[batch_ids_stim, :].transpose(tlist), ['x_batch'])

                y_batch = _standardize_input_data(events[batch_ids, :], ['y_batch'])

                w_batch = weights[batch_ids]

                w_batch[frame_numbers_b < delays[-1]] = 0.
                w_batch = _standardize_sample_weights(w_batch, ['w_batch'])
                yield (x_batch, y_batch, w_batch)
    def keras_generator(self, event_type='OASIS', delays=7, batch_size=400, shift=True):
        from keras.engine.training import _standardize_input_data
        if event_type not in self.events.keys():
            raise ValueError('Please specifiy one of the following for event_type: ' + str(self.events.keys()))

        movie_dict = self._movie_warps

        for (ds, msl, sl, cfn, dff, ci) in zip(self.datasets, self._movie_sample_list, self.shift_locs, self.corrected_frame_numbers, self.events[event_type], self.cell_indicies):
            for (movie_name, sl2, cfn2, dff2) in zip(msl[0], sl, cfn, dff):

                if movie_name not in movie_dict.keys():
                    tmp_movie = self._get_stimulus_template(ds, movie_name)
                    # bar = Bar('Processing ' + movie_name, max=len(tmp_movie))
                    tmp = self.warp_movie_to_screen(tmp_movie[0], movie_name)
                    tmp_warp = np.zeros((len(tmp_movie), tmp.shape[0], tmp.shape[1]), dtype='uint8')
                    for i in range(len(tmp_movie)):
                        tmp_warp[i] = self.warp_movie_to_screen(tmp_movie[i], movie_name)
                        # bar.next()

                    with open('/tmp/' + movie_name + '_' + str(self.downsample) + '.pickle', 'wb') as handle:
                        pickle.dump(tmp_warp, handle, protocol=pickle.HIGHEST_PROTOCOL)
                    movie_dict[movie_name] = tmp_warp
                    # bar.finish()

                # ssg = self._make_shifted_stim_resp_generator(movie_dict[movie_name], sl2, cfn2, dff2)

                original_stim = movie_dict[movie_name]
                frame_numbers = cfn2
                shift_locations = sl2
                resp = dff2

                sh = original_stim.shape

                idx = range(0, len(frame_numbers), batch_size)
                # print(idx)

                for cut in idx:

                    sl3 = shift_locations[cut:cut+batch_size]
                    fn = frame_numbers[cut:cut+batch_size]
                    resp_out = resp[:, cut:cut+batch_size]
                    # make larger stim defined by maximum shifts with a little extra slack
                    shift_stim_shape = (len(sl3),
                                        sh[1] + 2*np.maximum(self.min_max_shift[1][0], -self.min_max_shift[0][0]) + 2,
                                        sh[2] + 2*np.maximum(self.min_max_shift[1][1], -self.min_max_shift[0][1]) + 2)

                    if shift:
                        out_stim = np.zeros(shift_stim_shape, dtype='float32')
                    else:
                        out_stim = np.zeros((len(sl3), original_stim.shape[1], original_stim.shape[2]), dtype='float32')

                    sl3 = sl3 + [shift_stim_shape[1]/2, shift_stim_shape[2]/2]
                    good_shift_locations = ~np.isnan(sl3[:, 0])

                    for i in range(len(sl3)):
                        if shift:
                            if good_shift_locations[i]:
                                out_stim[i, -sh[1]/2 + np.int32(sl3[i, 0]):np.int32(sl3[i, 0]) + sh[1]/2,
                                            -sh[2]/2 + np.int32(sl3[i, 1]):np.int32(sl3[i, 1]) + sh[2]/2] = original_stim[fn[i]]
                        else:
                            out_stim[i] = original_stim[fn[i]]

                    x = out_stim
                    batch_ids = np.arange(x.shape[0])
                    # print(batch_ids)

                    tlist = [1, 0] + list(range(2, np.ndim(x) + 1))

                    batch_ids = [np.maximum(0, batch_ids - d) for d in range(delays)]
                    x_batch = _standardize_input_data(x[batch_ids, :].transpose(tlist), ['x_batch'])

                    yield (x_batch, resp_out)
Пример #16
0
    def fit(self,
            X,
            Y,
            X_U,
            batch_size=32,
            epochs=1,
            gp_n_iter=1,
            verbose=1,
            callbacks=None,
            validation_split=0.,
            validation_data=None,
            shuffle=True,
            class_weight=None,
            sample_weight=None,
            initial_epoch=0,
            **kwargs):
        """Trains the model for a fixed number of epochs (iterations on a dataset).

        For argument details, refer to `keras.engine.training.Model.fit`.

        Notes:
            The following arguments are currently unsupported by models with GP
            output layers:
            - validation_split
            - class_weight
            - sample_weight
        """
        # Validate user data
        X, Y, _ = self._standardize_user_data(X,
                                              Y,
                                              sample_weight=None,
                                              class_weight=None,
                                              check_batch_axis=False,
                                              batch_size=batch_size)
        if validation_data is not None:
            X_val, Y_val, _ = self._standardize_user_data(
                *validation_data,
                sample_weight=None,
                class_weight=None,
                check_batch_axis=False,
                batch_size=batch_size)
            validation_data = (X_val, Y_val)

        X_U = _standardize_input_data(X_U,
                                      self._feed_input_names,
                                      self._feed_input_shapes,
                                      check_batch_axis=False,
                                      exception_prefix='input')
        # Setup GP updates
        update_gp = UpdateSSDKL(ins=(X, Y),
                                unlabeled_ins=X_U,
                                val_ins=validation_data,
                                batch_size=batch_size,
                                gp_n_iter=gp_n_iter,
                                verbose=verbose)
        callbacks = [update_gp] + (callbacks or [])

        return super(Model, self).fit(X,
                                      Y,
                                      batch_size=batch_size,
                                      epochs=epochs,
                                      verbose=verbose,
                                      callbacks=callbacks,
                                      shuffle=shuffle,
                                      initial_epoch=initial_epoch,
                                      **kwargs)
Пример #17
0
def predict(config):
    ######## Read input config ########

    print(json.dumps(config, indent=2), end='\n')
    input_conf = config['inputs']
    share_input_conf = input_conf['share']

    # collect embedding
    if 'embed_path' in share_input_conf:
        embed_dict = read_embedding(filename=share_input_conf['embed_path'])
        _PAD_ = share_input_conf['vocab_size'] - 1
        embed_dict[_PAD_] = np.zeros((share_input_conf['embed_size'], ),
                                     dtype=np.float32)
        embed = np.float32(
            np.random.uniform(-0.02, 0.02, [
                share_input_conf['vocab_size'], share_input_conf['embed_size']
            ]))
        share_input_conf['embed'] = convert_embed_2_numpy(embed_dict,
                                                          embed=embed)
    else:
        embed = np.float32(
            np.random.uniform(-0.2, 0.2, [
                share_input_conf['vocab_size'], share_input_conf['embed_size']
            ]))
        share_input_conf['embed'] = embed
    print('[Embedding] Embedding Load Done.', end='\n')

    # list all input tags and construct tags config
    input_predict_conf = OrderedDict()
    for tag in input_conf.keys():
        if 'phase' not in input_conf[tag]:
            continue
        if input_conf[tag]['phase'] == 'PREDICT':
            input_predict_conf[tag] = {}
            input_predict_conf[tag].update(share_input_conf)
            input_predict_conf[tag].update(input_conf[tag])
    print('[Input] Process Input Tags. %s in PREDICT.' %
          (input_predict_conf.keys()),
          end='\n')

    # collect dataset identification
    dataset = {}
    for tag in input_conf:
        if tag == 'share' or input_conf[tag]['phase'] == 'PREDICT':
            if 'text1_corpus' in input_conf[tag]:
                datapath = input_conf[tag]['text1_corpus']
                if datapath not in dataset:
                    dataset[datapath], _ = read_data(datapath)
            if 'text2_corpus' in input_conf[tag]:
                datapath = input_conf[tag]['text2_corpus']
                if datapath not in dataset:
                    dataset[datapath], _ = read_data(datapath)
    print('[Dataset] %s Dataset Load Done.' % len(dataset), end='\n')

    # initial data generator
    predict_gen = OrderedDict()

    for tag, conf in input_predict_conf.items():
        print(conf, end='\n')
        conf['data1'] = dataset[conf['text1_corpus']]
        conf['data2'] = dataset[conf['text2_corpus']]
        generator = inputs.get(conf['input_type'])
        predict_gen[tag] = generator(
            #data1 = dataset[conf['text1_corpus']],
            #data2 = dataset[conf['text2_corpus']],
            config=conf)

    ######## Read output config ########
    output_conf = config['outputs']

    ######## Load Model ########
    global_conf = config["global"]
    weights_file = str(global_conf['weights_file']) + '.' + str(
        global_conf['test_weights_iters'])

    zmodel, kmodel = load_model(config)

    # test y_pred from zoo model and keras model
    # keras2_y_pred = kmodel.predict(input_data, batch_size=batch_size)
    # y_pred = model.forward(input_data)
    # # y_pred = model.predict(input_data, distributed=False)
    # equal = np.allclose(y_pred, keras2_y_pred, rtol=1e-5, atol=1e-5)
    # print(equal)
    # return y_pred

    eval_metrics = OrderedDict()
    for mobj in config['metrics']:
        mobj = mobj.lower()
        if '@' in mobj:
            mt_key, mt_val = mobj.split('@', 1)
            eval_metrics[mobj] = metrics.get(mt_key)(int(mt_val))
        else:
            eval_metrics[mobj] = metrics.get(mobj)
    res = dict([[k, 0.] for k in eval_metrics.keys()])

    # batch_size = 20
    # query_data = np.random.randint(0, 10000, [batch_size, 10])
    # doc_data = np.random.randint(0, 10000, [batch_size, 40])
    # input_data = [query_data, doc_data]
    # keras2_y_pred = keras2_model.predict(input_data, batch_size=batch_size)
    # y_pred = model.predict(input_data, distributed=False)
    # equal = np.allclose(y_pred, keras2_y_pred, rtol=1e-5, atol=1e-5)
    for tag, generator in predict_gen.items():
        genfun = generator.get_batch_generator()
        print('[%s]\t[Predict] @ %s ' % (time.strftime(
            '%m-%d-%Y %H:%M:%S', time.localtime(time.time())), tag),
              end='')
        num_valid = 0
        res_scores = {}
        for input_data, y_true in genfun:
            ky_pred = kmodel.predict(input_data, batch_size=len(y_true))
            names = ['query', 'doc']
            shapes = [(None, 10), (None, 40)]
            list_input_data = _standardize_input_data(input_data,
                                                      names,
                                                      shapes,
                                                      check_batch_axis=False)
            # list_input_data = [data[0:2, :] for data in list_input_data]
            # y_pred = zmodel.predict(list_input_data, distributed=False)
            y_pred = zmodel.forward(list_input_data)
            equal = np.allclose(y_pred, ky_pred, rtol=1e-5, atol=1e-5)
            print(equal)

            if issubclass(type(generator),
                          inputs.list_generator.ListBasicGenerator):
                list_counts = input_data['list_counts']
                for k, eval_func in eval_metrics.items():
                    for lc_idx in range(len(list_counts) - 1):
                        pre = list_counts[lc_idx]
                        suf = list_counts[lc_idx + 1]
                        res[k] += eval_func(y_true=y_true[pre:suf],
                                            y_pred=y_pred[pre:suf])

                y_pred = np.squeeze(y_pred)
                for lc_idx in range(len(list_counts) - 1):
                    pre = list_counts[lc_idx]
                    suf = list_counts[lc_idx + 1]
                    for p, y, t in zip(input_data['ID'][pre:suf],
                                       y_pred[pre:suf], y_true[pre:suf]):
                        if p[0] not in res_scores:
                            res_scores[p[0]] = {}
                        res_scores[p[0]][p[1]] = (y, t)

                num_valid += len(list_counts) - 1
            else:
                for k, eval_func in eval_metrics.items():
                    res[k] += eval_func(y_true=y_true, y_pred=y_pred)
                for p, y, t in zip(input_data['ID'], y_pred, y_true):
                    if p[0] not in res_scores:
                        res_scores[p[0]] = {}
                    res_scores[p[0]][p[1]] = (y[1], t[1])
                num_valid += 1
        generator.reset()

        if tag in output_conf:
            if output_conf[tag]['save_format'] == 'TREC':
                with open(output_conf[tag]['save_path'], 'w') as f:
                    for qid, dinfo in res_scores.items():
                        dinfo = sorted(dinfo.items(),
                                       key=lambda d: d[1][0],
                                       reverse=True)
                        for inum, (did, (score, gt)) in enumerate(dinfo):
                            f.write('%s\tQ0\t%s\t%d\t%f\t%s\t%s\n' %
                                    (qid, did, inum, score, config['net_name'],
                                     gt))
            elif output_conf[tag]['save_format'] == 'TEXTNET':
                with open(output_conf[tag]['save_path'], 'w') as f:
                    for qid, dinfo in res_scores.items():
                        dinfo = sorted(dinfo.items(),
                                       key=lambda d: d[1][0],
                                       reverse=True)
                        for inum, (did, (score, gt)) in enumerate(dinfo):
                            f.write('%s %s %s %s\n' % (gt, qid, did, score))

        print('[Predict] results: ',
              '\t'.join(['%s=%f' % (k, v / num_valid)
                         for k, v in res.items()]),
              end='\n')
        sys.stdout.flush()
Пример #18
0
def predict(model,
            batch_size,
            num_outputs,
            save_path,
            evaluate=False,
            liver_only=False,
            save_predictions=False,
            initial_epoch=0,
            **kwargs):
    model, callbacks, gen = prepare_model(model=model,
                                          num_outputs=num_outputs,
                                          liver_only=liver_only,
                                          evaluate=evaluate,
                                          **kwargs)

    # Set up prediction file.
    if save_predictions:
        save_path = os.path.join(save_path, "predictions.zarr")
        if os.path.exists(save_path):
            os.remove(save_path)

    # Initialize callbacks
    val_callback_list = [BaseLogger()]
    if not liver_only:
        val_callback_list.extend(
            [callbacks['dice_lesion'], callbacks['dice_lesion_inliver']])
    if len(model.outputs) == 2 or liver_only:
        val_callback_list.append(callbacks['dice_liver'])
    val_callbacks = CallbackList(val_callback_list)
    val_callbacks.set_params({
        'nb_epoch': 0,
        'nb_sample': 0,
        'verbose': False,
        'do_validation': True,
        'metrics': model.metrics_names
    })
    val_callbacks.on_train_begin()
    val_callbacks.on_epoch_begin(0)

    # Create theano function
    if evaluate:
        inputs = model.inputs + model.targets + model.sample_weights
        if model.uses_learning_phase and \
                not isinstance(K.learning_phase(), int):
            inputs += [K.learning_phase()]
        predict_function = K.function(inputs,
                                      model.outputs + [model.total_loss] +
                                      model.metrics_tensors,
                                      updates=model.state_updates)
    else:
        inputs = model.inputs
        if model.uses_learning_phase and \
                not isinstance(K.learning_phase(), int):
            inputs += [K.learning_phase()]
        predict_function = K.function(inputs,
                                      model.outputs,
                                      updates=model.state_updates)

    # Predict for all data.
    print(' > Predicting...')
    for key in gen:
        print(' - DATA: {}'.format(key))

        # Duplicate inputs and outputs (and add outputs) as necessary.
        flow = repeat_flow(gen[key].flow(), num_outputs=num_outputs)

        # Set up file.
        if save_predictions:
            zgroup = zarr.open_group(store=save_path, mode='a', path="/")
            zarr_kwargs = {
                'chunks': (1, 512, 512),
                'compressor': zarr.Blosc(cname='lz4', clevel=9, shuffle=1)
            }

        # Predict and write to file.
        batch_num = 0
        for vol_num, volume in enumerate(flow):
            print("Predicting on `{}` - {}/{}"
                  "".format(key, vol_num + 1, len(gen[key])))

            # Begin writing to file.
            if save_predictions:
                vol_idx = volume[-1]
                subgroup = zgroup.create_group(str(vol_idx))
                num_channels = np.sum(model.output_shape[i][1] \
                                                   for i in range(num_outputs))
                output_shape = \
                       (len(volume[0]), num_channels)+model.output_shape[0][2:]
                subgroup.empty("volume",
                               shape=output_shape,
                               dtype=np.float32,
                               **zarr_kwargs)
                segmentation = volume[1]
                if isinstance(segmentation, list):
                    segmentation = segmentation[0]
                subgroup.create_dataset("segmentation",
                                        shape=segmentation.shape,
                                        data=segmentation,
                                        dtype=np.int16,
                                        **zarr_kwargs)

            # Iterate through volume batch-wise.
            for idx0, idx1 in zip(
                    range(0, len(volume[0]), batch_size),
                    range(batch_size,
                          len(volume[0]) + batch_size + 1, batch_size)):
                # Prepare data for joint evaluation and prediction.
                if evaluate:
                    batch = (volume[0][idx0:idx1], volume[1][idx0:idx1])
                    x, y, sample_weights = model._standardize_user_data(
                        batch[0], batch[1])
                    ins = x + y + sample_weights
                else:
                    batch = (volume[0][idx0:idx1], )
                    ins = _standardize_input_data(batch[0],
                                                  model._feed_input_names,
                                                  model._feed_input_shapes,
                                                  check_batch_axis=False,
                                                  exception_prefix='input')
                if model.uses_learning_phase and \
                        not isinstance(K.learning_phase(), int):
                    ins += [0.]

                # Jointly evaluate and predict.
                outputs = predict_function(ins)
                if num_outputs == 1:
                    predictions = outputs[0:1]
                    if evaluate:
                        val_metrics = outputs[1:]
                elif num_outputs == 2:
                    predictions = outputs[0:2]
                    if evaluate:
                        val_metrics = outputs[2:]
                else:
                    raise ValueError("num_outputs must be 1 or 2")

                # Write predictions.
                predictions = np.concatenate(predictions, axis=1)
                subgroup['volume'][idx0:idx1] = predictions

                # Update metrics
                if evaluate:
                    val_logs = OrderedDict(
                        zip(model.metrics_names, val_metrics))
                    val_logs.update({
                        'batch': batch_num,
                        'size': len(batch[0])
                    })
                    val_callbacks.on_batch_end(batch_num, val_logs)

                batch_num += 1

    if evaluate:
        # Update metrics
        val_callbacks.on_epoch_end(0, val_logs)

        # Output metrics
        for m in val_logs:
            if m not in ['batch', 'size']:
                print("{}: {}".format(m, val_logs[m]))