Пример #1
0
def grad(X, Y, act, params, grads, aux):

    H, bh = params
    _H, _bh = grads
    a, eh, loss = aux

    # forward pass
    a[0].assign(X)
    n_layers = len(eh)

    for i in range(n_layers):
        # a = sigmoid( ap*H + bh )
        a[i].dot(H[i], target=a[i + 1])
        a[i + 1].add_row_vec(bh[i])

        if i < n_layers - 1:
            cm.sigmoid(a[i + 1])
        else:
            # last layer
            if act == 'logistic':
                cm.sigmoid(a[i + 1])
            elif act == 'softmax':
                a_t = a[i + 1].transpose()
                cm.softmax(a_t)
                a_t.transpose(target=a[i + 1])
                a_t.free_device_memory()
            else:
                pass

    # backward pass

    # compute error term of the last layer
    a[-1].subtract(Y, target=eh[-1])

    # check the following
    for i in range(n_layers - 1, -1, -1):

        # compute derivatives
        _H[i].assign(0.0)
        _H[i].add_dot(a[i].T, eh[i])
        eh[i].sum(axis=0, target=_bh[i])

        # compute error term for the previous layer
        if i > 0:
            # eh = sigmoid'(a) x ( ehp*H' )
            eh[i].dot(H[i].T, target=eh[i - 1])
            eh[i - 1].apply_logistic_deriv(a[i])

    if act == 'logistic':
        cm.cross_entropy_bernoulli(Y, a[n_layers], target=loss)
    elif act == 'softmax':
        loss = cm.cross_entropy(Y, a[n_layers], target=loss)
    elif act == 'linear':
        a[-1].mult(a[-1], target=loss)

    return loss.sum()
Пример #2
0
def grad(X, Y, act, params, grads, aux):

    H, bh = params
    _H, _bh = grads
    a, eh, loss = aux

    # forward pass
    a[0].assign(X)
    n_layers = len(eh)

    for i in range(n_layers):
        # a = sigmoid( ap*H + bh )
        a[i].dot(H[i], target = a[i+1])
        a[i+1].add_row_vec(bh[i])

        if i < n_layers-1:
            cm.sigmoid(a[i+1])
        else:
            # last layer
            if act == 'logistic':
                cm.sigmoid(a[i+1])
            elif act == 'softmax':
                a_t = a[i+1].transpose()
                cm.softmax(a_t)
                a_t.transpose(target=a[i+1])
                a_t.free_device_memory()
            else:
                pass

    # backward pass

    # compute error term of the last layer
    a[-1].subtract(Y, target=eh[-1])

    # check the following
    for i in range(n_layers-1, -1, -1):

        # compute derivatives
        _H[i].assign(0.0)
        _H[i].add_dot(a[i].T, eh[i])
        eh[i].sum(axis=0, target=_bh[i])

        # compute error term for the previous layer
        if i > 0:
            # eh = sigmoid'(a) x ( ehp*H' )
            eh[i].dot(H[i].T, target=eh[i-1])
            eh[i-1].apply_logistic_deriv(a[i])

    if act == 'logistic':
        cm.cross_entropy_bernoulli(Y, a[n_layers], target=loss)
    elif act == 'softmax':
        loss = cm.cross_entropy(Y, a[n_layers], target=loss)
    elif act == 'linear':
        a[-1].mult(a[-1], target=loss)

    return loss.sum()
Пример #3
0
    def GetLoss(self, get_deriv=False):
        """Compute loss and also deriv w.r.t to it if asked for.

    Compute the loss function. Targets should be in self.data, predictions
    should be in self.state.
    Args:
      get_deriv: If True, compute the derivative w.r.t the loss function and put
        it in self.deriv.
    """
        perf = deepnet_pb2.Metrics()
        perf.MergeFrom(self.proto.performance_stats)
        perf.count = self.batchsize
        tiny = self.tiny
        if self.loss_function == deepnet_pb2.Layer.CROSS_ENTROPY:
            if self.activation == deepnet_pb2.Hyperparams.LOGISTIC:
                data = self.data
                state = self.state
                deriv = self.deriv
                temp3 = self.temp3
                unitcell = self.unitcell

                cm.cross_entropy(data, state, target=deriv, tiny=self.tiny)
                deriv.sum(axis=1, target=temp3)
                temp3.sum(axis=0, target=unitcell)
                cross_entropy = unitcell.euclid_norm()

                cm.correct_preds(data, state, target=deriv, cutoff=0.5)
                deriv.sum(axis=1, target=temp3)
                temp3.sum(axis=0, target=unitcell)
                correct_preds = unitcell.euclid_norm()

                if get_deriv:
                    self.state.subtract(self.data, target=self.deriv)

                perf.cross_entropy = cross_entropy
                perf.correct_preds = correct_preds
            elif self.activation == deepnet_pb2.Hyperparams.SOFTMAX:
                temp2 = self.temp2
                temp = self.temp
                batchsize = self.batchsize
                dimensions = self.dimensions
                numlabels = self.numlabels
                state = self.state
                data = self.data
                unitcell = self.unitcell
                indices = self.indices

                # Optimized for space to handle large number of labels in a softmax.
                data.reshape((1, batchsize * dimensions))
                data.add(self.rowshift, target=indices)
                state.reshape((numlabels, dimensions * batchsize))
                state.max(axis=0, target=temp2)
                state.reshape((1, batchsize * numlabels * dimensions))
                state.select_columns(indices, temp)
                temp2.subtract(temp)
                temp2.sign(target=temp2)
                temp2.sum(axis=1, target=unitcell)
                correct_preds = batchsize - unitcell.euclid_norm()
                if get_deriv:
                    temp.subtract(1, target=temp2)
                    state.set_selected_columns(indices, temp2)
                    state.reshape((numlabels * dimensions, batchsize))
                    self.deriv.assign(self.state)
                state.reshape((numlabels * dimensions, batchsize))
                temp.add(tiny)
                cm.log(temp)
                temp.sum(axis=1, target=unitcell)
                cross_entropy = unitcell.euclid_norm()
                perf.cross_entropy = cross_entropy
                perf.correct_preds = correct_preds
        elif self.loss_function == deepnet_pb2.Layer.SQUARED_LOSS:
            if self.activation == deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX and self.hyperparams.normalize:
                self.data.sum(axis=0, target=self.temp)
                self.temp.add(self.tiny)
                self.data.div_by_row(self.temp, target=self.deriv)
                self.deriv.mult(self.proto.hyperparams.normalize_to)
                self.deriv.subtract(self.state)
            elif self.activation == deepnet_pb2.Hyperparams.SOFTMAX:
                self.expansion_matrix.select_columns(
                    self.data, target=self.expanded_batch)
                self.state.subtract(self.expanded_batch, target=self.deriv)
            else:
                self.state.subtract(self.data, target=self.deriv)
            error = self.deriv.euclid_norm()**2
            perf.error = error
            if self.activation != deepnet_pb2.Hyperparams.SOFTMAX and \
               self.activation != deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX:
                self.ComputeDeriv()
        else:
            raise Exception('Unknown loss function.')
        return perf
Пример #4
0
    def GetLoss(self, get_deriv=False):
        """Compute loss and also deriv w.r.t to it if asked for.

    Compute the loss function. Targets should be in self.data, predictions
    should be in self.state.
    Args:
      get_deriv: If True, compute the derivative w.r.t the loss function and put
        it in self.deriv.
    """
        perf = deepnet_pb2.Metrics()
        perf.MergeFrom(self.proto.performance_stats)
        perf.count = self.batchsize
        tiny = self.tiny
        if self.loss_function == deepnet_pb2.Layer.CROSS_ENTROPY:
            if self.activation == deepnet_pb2.Hyperparams.LOGISTIC:
                data = self.data
                state = self.state
                deriv = self.deriv
                temp3 = self.dimsize
                unitcell = self.unitcell

                cm.cross_entropy(data, state, target=deriv, tiny=self.tiny)
                deriv.sum(axis=1, target=temp3)
                temp3.sum(axis=0, target=unitcell)
                cross_entropy = unitcell.euclid_norm()

                cm.correct_preds(data, state, target=deriv, cutoff=0.5)
                deriv.sum(axis=1, target=temp3)
                temp3.sum(axis=0, target=unitcell)
                correct_preds = unitcell.euclid_norm()

                if get_deriv:
                    self.state.subtract(self.data, target=self.deriv)

                perf.cross_entropy = cross_entropy
                perf.correct_preds = correct_preds
            elif self.activation == deepnet_pb2.Hyperparams.SOFTMAX:
                temp2 = self.temp2
                temp = self.temp
                batchsize = self.batchsize
                dimensions = self.dimensions
                numlabels = self.numlabels
                state = self.state
                data = self.data
                unitcell = self.unitcell
                indices = self.indices

                # Optimized for space to handle large number of labels in a softmax.
                data.reshape((1, batchsize * dimensions))
                data.add(self.rowshift, target=indices)
                state.reshape((numlabels, dimensions * batchsize))
                state.max(axis=0, target=temp2)
                state.reshape((1, batchsize * numlabels * dimensions))
                state.select_columns(indices, temp)
                temp2.subtract(temp)
                temp2.sign(target=temp2)
                temp2.sum(axis=1, target=unitcell)
                correct_preds = batchsize - unitcell.euclid_norm()
                if get_deriv:
                    temp.subtract(1, target=temp2)
                    state.set_selected_columns(indices, temp2)
                    state.reshape((numlabels * dimensions, batchsize))
                    self.deriv.assign(self.state)
                state.reshape((numlabels * dimensions, batchsize))
                temp.add(tiny)
                cm.log(temp)
                temp.sum(axis=1, target=unitcell)
                cross_entropy = unitcell.euclid_norm()
                perf.cross_entropy = cross_entropy
                perf.correct_preds = correct_preds
        elif self.loss_function == deepnet_pb2.Layer.SQUARED_LOSS:
            if self.activation == deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX and self.hyperparams.normalize:
                self.data.sum(axis=0, target=self.temp)
                self.temp.add(self.tiny)
                self.data.div_by_row(self.temp, target=self.deriv)
                self.deriv.mult(self.proto.hyperparams.normalize_to)
                self.deriv.subtract(self.state)
            elif self.activation == deepnet_pb2.Hyperparams.SOFTMAX:
                self.expansion_matrix.select_columns(self.data, target=self.expanded_batch)
                self.state.subtract(self.expanded_batch, target=self.deriv)
            else:
                if "precision" in self.params:
                    self.data.mult_by_col(self.params["precision"], target=self.deriv)
                    self.deriv.subtract(self.state)
                else:
                    self.state.subtract(self.data, target=self.deriv)
            error = self.deriv.euclid_norm() ** 2
            perf.error = error
            if (
                self.activation != deepnet_pb2.Hyperparams.SOFTMAX
                and self.activation != deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX
            ):
                self.ComputeDeriv()
        else:
            raise Exception("Unknown loss function.")
        return perf