Exemplo n.º 1
0
def initialize_parameters(input_size, hidden_sizes, output_size):
    """
    Initializes the learnable parameters for a neural network (basically the connection weights and biases).
    The parameters are designed to work well with ReLU.
    :param input_size: The size of the input layer.
    :param hidden_sizes: The hidden layer sizes as array.
    :param output_size: The size of the output layer.
    :return: The connection weights and biases for the neural networks.
    """
    wh = []
    bh = []
    for i in range(len(hidden_sizes)):
        if i == 0:
            wh.append(
                np.random.randn(input_size, hidden_sizes[i]) *
                sqrt(2 / input_size))
        else:
            wh.append(
                np.random.randn(hidden_sizes[i - 1], hidden_sizes[i]) *
                sqrt(2 / input_size))
        bh.append(np.zeros((1, hidden_sizes[i])))
    w_out = np.random.randn(hidden_sizes[-1], output_size) * sqrt(
        2 / input_size)
    b_out = np.zeros((1, output_size))
    return wh, bh, w_out, b_out
Exemplo n.º 2
0
def main(args):
    # Create model.
    model = TwoLayerNet(args)
    for k, v in model.param_configs.items():
        model.params[k] = np.zeros(v['shape'])

    img = np.zeros((args.batch_size, 784))
    label = np.zeros((args.batch_size,))

    start = time.time()
    for l in range(num_loops):
        def loss_func(*params):
            f = model.forward(img, 'train')
            return model.loss(f, label)
        if args.only_forward:
            loss_func()
            loss.asnumpy()
        else:
            param_arrays = list(model.params.values())
            param_keys = list(model.params.keys())
            grad_and_loss_func = minpy.core.grad_and_loss(
                loss_func, argnum=range(len(param_arrays)))
            grad_arrays, loss = grad_and_loss_func(*param_arrays)
            for g in grad_arrays:
                g.get_data(minpy.array_variants.ArrayType.MXNET).wait_to_read()
    dur = time.time() - start
    print('Per Loop Time: %.6f' % (dur / num_loops))
Exemplo n.º 3
0
def lstm_temporal(x, h0, Wx, Wh, b):
    """
    Forward pass for an LSTM over an entire sequence of data. We assume an input
    sequence composed of T vectors, each of dimension D. The LSTM uses a hidden
    size of H, and we work over a minibatch containing N sequences. After running
    the LSTM forward, we return the hidden states for all timesteps.

    Note that the initial cell state is passed as input, but the initial cell
    state is set to zero. Also note that the cell state is not returned; it is
    an internal variable to the LSTM and is not accessed from outside.

    Inputs:
    - x: Input data of shape (N, T, D)
    - h0: Initial hidden state of shape (N, H)
    - Wx: Weights for input-to-hidden connections, of shape (D, 4H)
    - Wh: Weights for hidden-to-hidden connections, of shape (H, 4H)
    - b: Biases of shape (4H,)

    Returns a tuple of:
    - h: Hidden states for all timesteps of all sequences, of shape (N, T, H)
    """
    N, T, D = x.shape
    _, H = h0.shape
    c = np.zeros([N, 0, H])
    h = np.zeros([N, 0, H])
    for t in xrange(T):
        h_step, c_step = lstm_step(
            x[:, t, :], h[:, t - 1, :] if t > 0 else h0,
            c[:, t - 1, :] if t > 0 else np.zeros((N, H)), Wx, Wh, b)
        h_step = h_step.reshape(N, 1, H)
        c_step = c_step.reshape(N, 1, H)
        h = np.append(h, h_step, axis=1)
        c = np.append(c, c_step, axis=1)
    return h
Exemplo n.º 4
0
def main(args):
    # Create model.
    model = RNNNet(args)
    for k, v in model.param_configs.items():
        model.params[k] = np.zeros(v['shape'])

    data = np.zeros(
        (args.batch_size, args.input_size))  # Data of only one time step.
    label = np.zeros((args.batch_size, ))

    for l in range(args.num_loops):
        if l == num_cold:
            start = time.time()

        def loss_func(*params):
            f = model.forward(data, 'train')
            return model.loss(f, label)

        if args.only_forward:
            loss = loss_func()
            loss.wait_to_read()
        else:
            param_arrays = list(model.params.values())
            param_keys = list(model.params.keys())
            grad_and_loss_func = core.grad_and_loss(loss_func,
                                                    argnum=range(
                                                        len(param_arrays)))
            grad_arrays, loss = grad_and_loss_func(*param_arrays)
            for g in grad_arrays:
                g.wait_to_read()

    dur = time.time() - start
    print('Per Loop Time: %.6f' % (dur / (args.num_loops - num_cold)))
Exemplo n.º 5
0
    def __init__(self,
                 input_dim=3 * 32 * 32,
                 hidden_dim=100,
                 num_classes=10,
                 weight_scale=1e-3,
                 reg=0.0,
                 conv_mode='lazy',
                 dtype=py_np.float64):
        """
    Initialize a new network.

    Inputs:
    - input_dim: An integer giving the size of the input
    - hidden_dim: An integer giving the size of the hidden layer
    - num_classes: An integer giving the number of classes to classify
    - dropout: Scalar between 0 and 1 giving dropout strength.
    - weight_scale: Scalar giving the standard deviation for random
      initialization of the weights.
    - reg: Scalar giving L2 regularization strength.
    """
        super(TwoLayerNet, self).__init__(conv_mode)
        self.params = {}
        self.reg = reg

        self.params['W1'] = random.randn(input_dim, hidden_dim) * weight_scale
        self.params['b1'] = np.zeros((hidden_dim))
        self.params['W2'] = random.randn(hidden_dim,
                                         num_classes) * weight_scale
        self.params['b2'] = np.zeros((num_classes))
Exemplo n.º 6
0
    def __init__(self,
                 input_dim=3 * 32 * 32,
                 hidden_dim=100,
                 num_classes=10,
                 weight_scale=1e-3,
                 reg=0.0,
                 conv_mode='lazy',
                 dtype=py_np.float64):
        """
        Initialize a new network.

        Inputs:
        - input_dim: An integer giving the size of the input
        - hidden_dim: An integer giving the size of the hidden layer
        - num_classes: An integer giving the number of classes to classify
        - dropout: Scalar between 0 and 1 giving dropout strength.
        - weight_scale: Scalar giving the standard deviation for random
          initialization of the weights.
        - reg: Scalar giving L2 regularization strength.
        """
        super(TwoLayerNet, self).__init__(conv_mode)
        self.params = {}
        self.reg = reg

        self.params['W1'] = random.randn(input_dim, hidden_dim) * weight_scale
        self.params['b1'] = np.zeros((hidden_dim))
        self.params['W2'] = random.randn(hidden_dim, num_classes) * weight_scale
        self.params['b2'] = np.zeros((num_classes))
Exemplo n.º 7
0
def main(args):
    # Create model.
    model = RNNNet(args)
    for k, v in model.param_configs.items():
        model.params[k] = np.zeros(v['shape'])

    data = np.zeros((args.batch_size, args.input_size)) # Data of only one time step.
    label = np.zeros((args.batch_size,), dtype=np.int)

    for l in range(args.num_loops):
        if l == num_cold:
            start = time.time()
        def loss_func(*params):
            f = model.forward(data, 'train')
            return model.loss(f, label)
        if args.only_forward:
            loss = loss_func()
            loss.asnumpy()
        else:
            param_arrays = list(model.params.values())
            param_keys = list(model.params.keys())
            grad_and_loss_func = core.grad_and_loss(
                loss_func, argnum=range(len(param_arrays)))
            grad_arrays, loss = grad_and_loss_func(*param_arrays)
    dur = time.time() - start
    print('Per Loop Time: %.6f' % (dur / (args.num_loops - num_cold)))
 def getLaplacian(self, W):
     D = np.zeros((W.shape[0], W.shape[1]))
     L = np.zeros((W.shape[0], W.shape[1]))
     for i in range(W.shape[1]):
         D[i][i] = np.sum(W[:, i])
     L = D - W
     return [D, L]
Exemplo n.º 9
0
def lstm_temporal(x, h0, Wx, Wh, b):
    """
    Forward pass for an LSTM over an entire sequence of data. We assume an input
    sequence composed of T vectors, each of dimension D. The LSTM uses a hidden
    size of H, and we work over a minibatch containing N sequences. After running
    the LSTM forward, we return the hidden states for all timesteps.

    Note that the initial cell state is passed as input, but the initial cell
    state is set to zero. Also note that the cell state is not returned; it is
    an internal variable to the LSTM and is not accessed from outside.

    Inputs:
    - x: Input data of shape (N, T, D)
    - h0: Initial hidden state of shape (N, H)
    - Wx: Weights for input-to-hidden connections, of shape (D, 4H)
    - Wh: Weights for hidden-to-hidden connections, of shape (H, 4H)
    - b: Biases of shape (4H,)

    Returns a tuple of:
    - h: Hidden states for all timesteps of all sequences, of shape (N, T, H)
    """
    N, T, D = x.shape
    _, H = h0.shape
    c = np.zeros([N, 0, H])
    h = np.zeros([N, 0, H])
    for t in xrange(T):
        h_step, c_step = lstm_step(
          x[:, t, :], h[:, t-1, :] if t > 0 else h0, c[:, t-1, :] if t > 0 else np.zeros((N, H)), Wx, Wh, b)
        h_step = h_step.reshape(N, 1, H)
        c_step = c_step.reshape(N, 1, H)
        h = np.append(h, h_step, axis=1)
        c = np.append(c, c_step, axis=1)
    return h
Exemplo n.º 10
0
def main(args):
    # Create model.
    model = TwoLayerNet(args)
    for k, v in model.param_configs.items():
        model.params[k] = np.zeros(v['shape'])

    img = np.zeros((args.batch_size, 784))
    label = np.zeros((args.batch_size, ), dtype=np.int)

    for l in range(args.num_loops):
        if l == num_cold:
            start = time.time()

        def loss_func(*params):
            f = model.forward(img, 'train')
            return model.loss(f, label)

        if args.only_forward:
            loss = loss_func()
            loss.asnumpy()
        else:
            param_arrays = list(model.params.values())
            param_keys = list(model.params.keys())
            grad_and_loss_func = core.grad_and_loss(loss_func,
                                                    argnum=range(
                                                        len(param_arrays)))
            grad_arrays, loss = grad_and_loss_func(*param_arrays)
    dur = time.time() - start
    print('Per Loop Time: %.6f' % (dur / (args.num_loops - num_cold)))
Exemplo n.º 11
0
 def gaussian_cluster_generator(num_samples=10000, num_features=500, num_classes=5):
     mu = np.random.rand(num_classes, num_features)
     sigma = np.ones((num_classes, num_features)) * 0.1
     num_cls_samples = int(num_samples / num_classes)
     x = np.zeros((num_samples, num_features))
     y = np.zeros((num_samples, num_classes))
     for i in range(num_classes):
         cls_samples = np.random.normal(mu[i,:], sigma[i,:], (num_cls_samples, num_features))
         x[i*num_cls_samples:(i+1)*num_cls_samples] = cls_samples
         y[i*num_cls_samples:(i+1)*num_cls_samples,i] = 1
     return x, y
Exemplo n.º 12
0
    def getP(self, Xmask, F, X_big):
        size = F.shape[0]
        Q = np.zeros((size, size))
        P = np.zeros((size, size))
        for i in range(size):
            for j in range(size):
                Q[i, j] = self.norm(F[i] - F[j])

        P = (2 * Xmask * X_big - self.rho * Q) / (2 * Xmask + np.full(
            (size, size), self.alpha))
        return P
Exemplo n.º 13
0
 def forward(self, X, mode):
     seq_len = X.shape[1]
     batch_size = X.shape[0]
     hidden_size = self.params['Wh'].shape[0]
     h = np.zeros((batch_size, hidden_size))
     c = np.zeros((batch_size, hidden_size))
     for t in xrange(seq_len):
         h, c = layers.lstm_step(X[:, t, :], h, c, self.params['Wx'],
                                 self.params['Wh'], self.params['b'])
     y = layers.affine(h, self.params['Wa'], self.params['ba'])
     return y
Exemplo n.º 14
0
Arquivo: lstm.py Projeto: lryta/minpy
 def forward(self, X, mode):
     seq_len = X.shape[1]
     batch_size = X.shape[0]
     hidden_size = self.params['Wh'].shape[0]
     h = np.zeros((batch_size, hidden_size))
     c = np.zeros((batch_size, hidden_size))
     for t in xrange(seq_len):
         h, c = layers.lstm_step(X[:, t, :], h, c, self.params['Wx'], self.params['Wh'],
                                 self.params['b'])
     y = layers.affine(h, self.params['Wa'], self.params['ba'])
     return y
Exemplo n.º 15
0
def statistics(data, length):
    xax = np.zeros(length)
    yax = np.zeros(length)
    fixed_2daxis_slice(data, xax, length, axis=1)
    fixed_2daxis_slice(data, yax, length, axis=2)
    print(xax)
    print(yax)
    xax = xax.asnumpy()
    yax = yax.asnumpy()
    plt.plot(xax, yax)
    
    plt.grid()
    plt.show()
  def __init__(self, word_to_idx, input_dim=512, wordvec_dim=128,
               hidden_dim=128, cell_type='rnn', dtype=None):
    """
    Construct a new CaptioningRNN instance.

    Inputs:
    - word_to_idx: A dictionary giving the vocabulary. It contains V entries,
      and maps each string to a unique integer in the range [0, V).
    - input_dim: Dimension D of input image feature vectors.
    - wordvec_dim: Dimension W of word vectors.
    - hidden_dim: Dimension H for the hidden state of the RNN.
    - cell_type: What type of RNN to use; either 'rnn' or 'l#stm'.
    - dtype: numpy datatype to use; use float32 for training and float64 for
      numeric gradient checking.
    """
    if cell_type not in {'rnn', 'lstm'}:
      raise ValueError('Invalid cell_type "%s"' % cell_type)
    
    self.cell_type = cell_type
    self.dtype = dtype
    self.word_to_idx = word_to_idx
    self.idx_to_word = {i: w for w, i in word_to_idx.iteritems()}
    self.params = {}
    
    vocab_size = len(word_to_idx)

    self._null = word_to_idx['<NULL>']
    self._start = word_to_idx.get('<START>', None)
    self._end = word_to_idx.get('<END>', None)
    
    # Initialize word vectors
    self.params['W_embed'] = np.random.randn(vocab_size, wordvec_dim)
    self.params['W_embed'] /= 100
    
    # Initialize CNN -> hidden state projection parameters
    self.params['W_proj'] = np.random.randn(input_dim, hidden_dim)
    self.params['W_proj'] /= np.sqrt(input_dim)
    self.params['b_proj'] = np.zeros(hidden_dim)

    # Initialize parameters for the RNN
    dim_mul = {'lstm': 4, 'rnn': 1}[cell_type]
    self.params['Wx'] = np.random.randn(wordvec_dim, dim_mul * hidden_dim)
    self.params['Wx'] /= np.sqrt(wordvec_dim)
    self.params['Wh'] = np.random.randn(hidden_dim, dim_mul * hidden_dim)
    self.params['Wh'] /= np.sqrt(hidden_dim)
    self.params['b'] = np.zeros(dim_mul * hidden_dim)
    
    # Initialize output to vocab weights
    self.params['W_vocab'] = np.random.randn(hidden_dim, vocab_size)
    self.params['W_vocab'] /= np.sqrt(hidden_dim)
    self.params['b_vocab'] = np.zeros(vocab_size)
Exemplo n.º 17
0
def lr_hmc(y, X, epsilon, L, alpha, n_iter):
    def U(beta):
        return mp.sum(mp.log(1 + mp.exp(mp.dot(X, beta))))-mp.dot(y.T,(mp.dot(X,beta)))+(0.5/alpha)*mp.sum(beta**2)

    def dU(beta):
        return mp.dot(X.T, (mp.exp(mp.dot(X,beta))/(1+mp.exp(mp.dot(X,beta))) - y)) + beta/alpha

    D = X.shape[1]
    q = mp.zeros((D, 1), dtype=mp.float32)
    out = mp.zeros((n_iter, D), dtype=mp.float32)
    for i in range(n_iter):
        q = hmc(U, dU, epsilon, L, q)
        out[i,:] = mp.ravel(q)
    return out
Exemplo n.º 18
0
 def gaussian_cluster_generator(num_samples=10000,
                                num_features=500,
                                num_classes=5):
     mu = np.random.rand(num_classes, num_features)
     sigma = np.ones((num_classes, num_features)) * 0.1
     num_cls_samples = int(num_samples / num_classes)
     x = np.zeros((num_samples, num_features))
     y = np.zeros((num_samples, num_classes))
     for i in range(num_classes):
         cls_samples = np.random.normal(mu[i, :], sigma[i, :],
                                        (num_cls_samples, num_features))
         x[i * num_cls_samples:(i + 1) * num_cls_samples] = cls_samples
         y[i * num_cls_samples:(i + 1) * num_cls_samples, i] = 1
     return x, y
Exemplo n.º 19
0
def test_lr_grad():
    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)

    def predict(weights, inputs):
        return sigmoid(np.dot(inputs, weights))

    def training_loss(inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l

    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(
            np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
        return (256 - error) * 100 / 256.0

    wshape = (500, 250)
    weights = random.rand(*wshape) - 0.5

    xshape = (256, 500)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1

    gradient_checker.quick_grad_check(training_loss, inputs)
Exemplo n.º 20
0
 def getMask(self, M):
     mask = np.zeros((M.shape[0], M.shape[1]))
     (index_i, index_j) = np.nonzero(M)
     for i in range(index_i.shape[0]):
         mask[index_i[i]][index_j[i]] = 1
     # print mask
     return mask
Exemplo n.º 21
0
def softmax_cross_entropy(prob, label):
    """
    Computes the cross entropy for softmax activation.

    Inputs:
    - prob: Probability, of shape (N, C) where x[i, j] is the probability for the jth class
      for the ith input.
    - label: Either of the followings:
      - One hot encoding of labels, of shape (N, C)
      - Label index of shape (N, ), each y[i] is the label of i^th example
        (0 <= y[i] < C)

    Returns a Value:
    - cross_entropy
    """

    N = prob.shape[0]
    C = prob.shape[1]
    if len(label.shape) == 1:
        #convert it to one hot encoding
        onehot_label = np.zeros([N, C])
        np.onehot_encode(label, onehot_label)
    else:
        onehot_label = label
    return -np.sum(np.log(prob) * onehot_label) / N
Exemplo n.º 22
0
def softmax_loss(x, label):
    """
    Computes the loss and gradient for softmax classification.

    Inputs:
    - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
      for the ith input.
    - y: Either of the followings:
      - One hot encoding of labels, of shape (N, C)
      - Label index of shape (N, ), each y[i] is the label of i^th example
        (0 <= y[i] < C)

    Returns a tuple of:
    - loss: Scalar giving the loss
    """
    N = x.shape[0]
    C = x.shape[1]
    if len(label.shape) == 1:
        #convert it to one hot encoding
        onehot_label = np.zeros([N, C])
        np.onehot_encode(label, onehot_label)
    else:
        onehot_label = label
    prob = np.softmax_output(x, onehot_label)
    return softmax_cross_entropy(prob, onehot_label)
Exemplo n.º 23
0
def softmax_cross_entropy(prob, label):
    """
    Computes the cross entropy for softmax activation.

    Inputs:
    - prob: Probability, of shape (N, C) where x[i, j] is the probability for the jth class
      for the ith input.
    - label: Either of the followings:
      - One hot encoding of labels, of shape (N, C)
      - Label index of shape (N, ), each y[i] is the label of i^th example
        (0 <= y[i] < C)

    Returns a Value:
    - cross_entropy
    """

    N = prob.shape[0]
    C = prob.shape[1]
    if len(label.shape) == 1:
        #convert it to one hot encoding
        onehot_label = np.zeros([N, C])
        np.onehot_encode(label, onehot_label)
    else:
        onehot_label = label
    return -np.sum(np.log(prob) * onehot_label) / N
Exemplo n.º 24
0
def softmax_loss(x, y):
    """
    Computes the loss and gradient for softmax classification.

    Inputs:
    - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
      for the ith input.
    - y: Either of the followings:
      - One hot encoding of labels, of shape (N, C)
      - Label index of shape (N, ), each y[i] is the label of i^th example
        (0 <= y[i] < C)

    Returns a tuple of:
    - loss: Scalar giving the loss
    """
    N = x.shape[0]
    C = x.shape[1]
    if len(y.shape) == 1:
        #convert it to one hot encoding
        onehot_y = np.zeros([N, C])
        np.onehot_encode(y, onehot_y)
    else:
        onehot_y = y
    probs = x - np.max(x, axis=1, keepdims=True)
    loss = -np.sum(probs * onehot_y) / N
    loss += np.sum(np.log(np.sum(np.exp(probs), axis=1, keepdims=True))) / N
    return loss
Exemplo n.º 25
0
def rnn_temporal(x, h0, Wx, Wh, b):
    """
    Run a vanilla RNN forward on an entire sequence of data. We assume an input
    sequence composed of T vectors, each of dimension D. The RNN uses a hidden
    size of H, and we work over a minibatch containing N sequences. After running
    the RNN forward, we return the hidden states for all timesteps.

    Inputs:
    - x: Input data for the entire timeseries, of shape (N, T, D).
    - h0: Initial hidden state, of shape (N, H)
    - Wx: Weight matrix for input-to-hidden connections, of shape (D, H)
    - Wh: Weight matrix for hidden-to-hidden connections, of shape (H, H)
    - b: Biases of shape (H,)

    Returns a tuple of:
    - h: Hidden states for the entire timeseries, of shape (N, T, H).
    """
    N, T, _ = x.shape
    H = h0.shape[1]
    h = np.zeros([N, 0, H])
    for t in range(T):
        h_step = rnn_step(x[:, t, :], h0 if t == 0 else h[:, t - 1, :], Wx, Wh,
                          b).reshape(N, 1, H)
        h = np.append(h, h_step, axis=1)
    return h
Exemplo n.º 26
0
def rnn_temporal(x, h0, Wx, Wh, b):
    """
    Run a vanilla RNN forward on an entire sequence of data. We assume an input
    sequence composed of T vectors, each of dimension D. The RNN uses a hidden
    size of H, and we work over a minibatch containing N sequences. After running
    the RNN forward, we return the hidden states for all timesteps.

    Inputs:
    - x: Input data for the entire timeseries, of shape (N, T, D).
    - h0: Initial hidden state, of shape (N, H)
    - Wx: Weight matrix for input-to-hidden connections, of shape (D, H)
    - Wh: Weight matrix for hidden-to-hidden connections, of shape (H, H)
    - b: Biases of shape (H,)

    Returns a tuple of:
    - h: Hidden states for the entire timeseries, of shape (N, T, H).
    """
    N, T, D = x.shape
    H = h0.shape[1]
    h = np.zeros([N, 0, H])
    for t in xrange(T):
        h_step = rnn_step(x[:, t, :], h0 if t == 0 else h[:, t - 1, :], Wx, Wh,
                          b).reshape(N, 1, H)
        h = np.append(h, h_step, axis=1)
    return h
Exemplo n.º 27
0
def softmax_loss(x, y):
    """
    Computes the loss and gradient for softmax classification.

    Inputs:
    - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
      for the ith input.
    - y: Either of the followings:
      - One hot encoding of labels, of shape (N, C)
      - Label index of shape (N, ), each y[i] is the label of i^th example
        (0 <= y[i] < C)

    Returns a tuple of:
    - loss: Scalar giving the loss
    """
    N = x.shape[0]
    C = x.shape[1]
    if len(y.shape) == 1:
        #convert it to one hot encoding
        onehot_y = np.zeros([N, C])
        np.onehot_encode(y, onehot_y)
    else:
        onehot_y = y
    probs = x - np.max(x, axis=1, keepdims=True)
    loss = -np.sum(probs * onehot_y) / N
    loss += np.sum(np.log(np.sum(np.exp(probs), axis=1, keepdims=True))) / N
    return loss
Exemplo n.º 28
0
 def forward(self, X, mode):
     h = np.zeros(self.hshape)  # init hidden state
     for t in range(self.num_unroll_steps):
         h = layers.rnn_step(X, h, self.params['Wx'], self.params['Wh'],
                             self.params['b'])
     y = layers.affine(h, self.params['Wa'], self.params['ba'])
     return y
Exemplo n.º 29
0
    def forward(self, X, mode):
        N, sequence_length, D = X.shape
        WX = self.params['WX']
        Wh = self.params['Wh']
        bias_h = self.params['bias_h']
        WY = self.params['WY']
        bias_Y = self.params['bias_Y']
        WY0 = self.params['WY0']
        bias_Y0 = self.params['bias_Y0']

        h = np.zeros((N, self._n_hidden))
        self.previous_h = [h]
        for t in xrange(sequence_length):
            X_t = X[:, t, :]
            h0 = self._update_h(X_t, h, WX, Wh, bias_h)
            projected_h = sum(
                batch_scalar_product(h, h0) * h
                for t, h in enumerate(self.previous_h))
            h = np.dot(X_t, WX) + np.dot(h, Wh) + projected_h
            h = self._nonlinear(h)
            self.previous_h.append(h)

        Y0 = layers.relu(layers.affine(h, WY0, bias_Y0))
        Y = layers.affine(Y0, WY, bias_Y)
        return Y
Exemplo n.º 30
0
 def forward(self, X, mode):
     h = np.zeros(self.hshape)  # init hidden state
     for t in xrange(self.num_unroll_steps):
         h = layers.rnn_step(X, h, self.params['Wx'],
                             self.params['Wh'], self.params['b'])
     y = layers.affine(h, self.params['Wa'], self.params['ba'])
     return y
Exemplo n.º 31
0
def test_lr_grad():
    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)

    def predict(weights, inputs):
        return sigmoid(np.dot(inputs, weights))

    def training_loss(inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l

    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
        return (256 - error) * 100 / 256.0

    wshape = (500, 250)
    weights = random.rand(*wshape) - 0.5

    xshape = (256, 500)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1

    gradient_checker.quick_grad_check(training_loss, inputs)
Exemplo n.º 32
0
def test_context():
    set_context(gpu(1)) # set the global context as gpu(1)
    
    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)
    
    def predict(weights, inputs):
        return sigmoid(np.dot(inputs, weights))
    
    def training_loss(weights, inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l
    
    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
        return (256 - error) * 100 / 256.0
    
    with gpu(0):
        xshape = (256, 500)
        wshape = (500, 250)
        tshape = (256, 250)
        inputs = random.rand(*xshape) - 0.5
        targets = np.zeros(tshape)
        truth = random.randint(0, 250, 256)
        targets[np.arange(256), truth] = 1
        weights = random.rand(*wshape) - 0.5
    
        training_gradient_fun = grad(training_loss)
    
        for i in range(20):
            print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs)))
            gr = training_gradient_fun(weights, inputs)
            weights -= gr * 0.01
        print("\nff and bp on {0}".format(weights.context))
    
    print("\nexecute on cpu")
    with cpu():
        x_cpu = random.rand(32, 64) - 0.5
        y_cpu = random.rand(64, 32) - 0.5
        z_cpu = np.dot(x_cpu, y_cpu)
        print('z_cpu.context = {0}'.format(z_cpu.context))
    
    print("\nexecute on gpu(0)")
    with gpu(0):
        x_gpu0 = random.rand(32, 64) - 0.5
        y_gpu0 = random.rand(64, 32) - 0.5
        z_gpu0 = np.dot(x_gpu0, y_gpu0)
        z_gpu0.asnumpy()
        print('z_gpu0.context = {0}'.format(z_gpu0.context))
    
    print("\n[use global context] execute on gpu(1)")
    x_gpu1 = random.rand(32, 64) - 0.5
    y_gpu1 = random.rand(64, 32) - 0.5
    z_gpu1 = np.dot(x_gpu1, y_gpu1)
    z_gpu1.asnumpy()
    print('z_gpu1.context = {0}'.format(z_gpu1.context))
Exemplo n.º 33
0
def blob_normalization(X,
                       settings,
                       gamma,
                       beta,
                       mode='train',
                       epsilon=1e-5,
                       momentum=0.9,
                       running_mean=None,
                       running_variance=None):
    N, D = map(int, X.shape)
    size = N * D

    if running_mean is None:
        running_mean = np.zeros(1)
    if running_variance is None:
        running_variance = np.zeros(1)

    if mode == 'train':
        if 'shared_mean' in settings:
            mean = np.sum(X) / size
        else:
            mean = np.sum(X, axis=0) / N
            mean = np.reshape(mean, (1, D))

        centered_X = X - mean

        if 'shared_deviation' in settings:
            variance = np.sum(centered_X**2) / size
        else:
            variance = np.sum(centered_X**2, axis=0) / N
            variance = np.reshape(variance, (1, D))

        deviation = variance**0.5
        rescaled_X = centered_X / deviation

        out = gamma * rescaled_X + beta

        running_mean = momentum * running_mean + (1.0 - momentum) * mean
        running_variance = momentum * running_variance + (1.0 -
                                                          momentum) * variance

    elif mode == 'test':
        X_hat = (X - running_mean) / np.sqrt(running_variance + epsilon)
        out = gamma * X_hat + beta

    return out, running_mean, running_variance
Exemplo n.º 34
0
 def forward(self, X, mode):
     out = self.conv(X=X, **self.params)
     out = layers.affine(out, self.params['w1'], self.params['b1'])
     out = layers.relu(out)
     out = layers.affine(out, self.params['w2'], self.params['b2'])
     # This verifies whether symbols can be reused.
     trash = self.conv(X=np.zeros(X.shape), **self.params)
     return out
Exemplo n.º 35
0
 def forward(self, X, mode):
     out = self.conv(X=X, **self.params)
     out = layers.affine(out, self.params['w1'], self.params['b1'])
     out = layers.relu(out)
     out = layers.affine(out, self.params['w2'], self.params['b2'])
     # This verifies whether symbols can be reused.
     trash = self.conv(X=np.zeros(X.shape), **self.params)
     return out
Exemplo n.º 36
0
    def forward(self, X, mode):
        N, sequence_length, D = X.shape
        h = np.zeros((N, self._n_hidden))
        c = np.zeros((N, self._n_hidden))

        WX = self.params['WX']
        Wh = self.params['Wh']
        bias = self.params['bias']
        WY = self.params['WY']
        bias_Y = self.params['bias_Y']

        for t in range(sequence_length):
            X_t = X[:, t, :]
            h, c = layers.lstm_step(X_t, h, c, WX, Wh, bias)

        Y = layers.affine(h, WY, bias_Y)
        return Y
Exemplo n.º 37
0
def softmax_probability(p, channel):
    N, C = p.shape
    p -= np.max(p, axis=1).reshape((N, 1))
    code = np.zeros((N, C))
    np.onehot_encode(channel, code)
    p = np.exp(p)
    selected_p = p * code
    total_p = np.sum(p, axis=1).reshape((N, 1))
    return np.sum(selected_p / total_p, axis=1)
Exemplo n.º 38
0
    def set_param(self):
        self.params = {}

        c_cnt, height, width = self.input_dim
        f_cnt = self.num_filters
        f_h, f_w = self.filter_size, self.filter_size

        self.params['conv1_weight'] = random.randn(f_cnt, c_cnt, f_h,
                                                   f_w) * self.weight_scale
        self.params['conv1_bias'] = np.zeros(f_cnt)

        #TODO(Haoran): whole stuff about all dimension calculations
        #should be substituted by quering symbol.arg_list
        conv_stride = 1
        conv_pad = (f_h - 1) / 2

        Hc, Wc = 1 + (height + 2 * conv_pad - f_h) / conv_stride, 1 + (
            width + 2 * conv_pad - f_w) / conv_stride

        pool_height, pool_width = 2, 2
        pool_stride = 2

        Hp, Wp = (Hc - pool_height) / pool_stride + 1, (
            Wc - pool_width) / pool_stride + 1

        # weight has to be tranposed to fit mxnet's symbol
        self.params['fc1_weight'] = np.transpose(
            random.randn(5408, self.hidden_dim) * self.weight_scale)
        self.params['fc1_bias'] = np.zeros((self.hidden_dim))

        # weight has to be tranposed to fit mxnet's symbol
        self.params['fc2_weight'] = np.transpose(
            random.randn(self.hidden_dim, self.num_classes) *
            self.weight_scale)
        self.params['fc2_bias'] = np.zeros((self.num_classes))

        #TODO(Haoran): move following into parent structured model class
        self.param_keys = self.params.keys()

        # Build key's index in loss func's arglist
        self.key_args_index = {}
        for i, key in enumerate(self.param_keys):
            # data, targets would be the first two elments in arglist
            self.key_args_index[key] = self.data_target_cnt + i
Exemplo n.º 39
0
def softmax_cross_entropy(prob, label):
    N = prob.shape[0]
    C = prob.shape[1]
    if len(label.shape) == 1:
        #convert it to one hot encoding
        onehot_label = np.zeros([N, C])
        np.onehot_encode(label, onehot_label)
    else:
        onehot_label = label
    return -np.sum(np.log(prob) * onehot_label) / N
Exemplo n.º 40
0
    def set_param(self):
        self.params = {}

        c_cnt, height, width = self.input_dim
        f_cnt = self.num_filters
        f_h, f_w = self.filter_size, self.filter_size

        self.params['conv1_weight'] = random.randn(f_cnt, c_cnt, f_h,
                                                   f_w) * self.weight_scale
        self.params['conv1_bias'] = np.zeros(f_cnt)

        #TODO(Haoran): whole stuff about all dimension calculations
        #should be substituted by quering symbol.arg_list
        conv_stride = 1
        conv_pad = (f_h - 1) / 2

        Hc, Wc = 1 + (height + 2 * conv_pad - f_h) / conv_stride, 1 + (
            width + 2 * conv_pad - f_w) / conv_stride

        pool_height, pool_width = 2, 2
        pool_stride = 2

        Hp, Wp = (Hc - pool_height) / pool_stride + 1, (Wc - pool_width
                                                       ) / pool_stride + 1

        # weight has to be tranposed to fit mxnet's symbol
        self.params['fc1_weight'] = np.transpose(random.randn(
            5408, self.hidden_dim) * self.weight_scale)
        self.params['fc1_bias'] = np.zeros((self.hidden_dim))

        # weight has to be tranposed to fit mxnet's symbol
        self.params['fc2_weight'] = np.transpose(random.randn(
            self.hidden_dim, self.num_classes) * self.weight_scale)
        self.params['fc2_bias'] = np.zeros((self.num_classes))

        #TODO(Haoran): move following into parent structured model class
        self.param_keys = self.params.keys()

        # Build key's index in loss func's arglist
        self.key_args_index = {}
        for i, key in enumerate(self.param_keys):
            # data, targets would be the first two elments in arglist
            self.key_args_index[key] = self.data_target_cnt + i
Exemplo n.º 41
0
def softmax_cross_entropy(prob, label):
    N = prob.shape[0]
    C = prob.shape[1]
    if len(label.shape) == 1:
        #convert it to one hot encoding
        onehot_label = np.zeros([N, C])
        np.onehot_encode(label, onehot_label)
    else:
        onehot_label = label
    return -np.sum(np.log(prob) * onehot_label) / N
Exemplo n.º 42
0
def build_dataset(filename, sequence_length, alphabet_size, max_lines=-1):
    """Loads a text file, and turns each line into an encoded sequence."""
    with open(filename) as f:
        content = f.readlines()
    content = content[:max_lines]
    content = [line for line in content if len(line) > 2]  # Remove blank lines
    seqs = np.zeros((sequence_length, len(content), alphabet_size))
    for ix, line in enumerate(content):
        padded_line = (line + " " * sequence_length)[:sequence_length]
        seqs[:, ix, :] = string_to_one_hot(padded_line, alphabet_size)
    return seqs
Exemplo n.º 43
0
def main():
    X = np.random.randn(10, 3, 256, 256)
    w = np.random.randn(8, 3, 3, 3)
    b = np.zeros((8, ))
    params = {'pad': 1, 'stride': 2}

    start = time.time()

    conv_forward_naive(X, w, b, params)

    print time.time() - start
Exemplo n.º 44
0
def build_dataset(filename, sequence_length, alphabet_size, max_lines=-1):
    """Loads a text file, and turns each line into an encoded sequence."""
    with open(filename) as f:
        content = f.readlines()
    content = content[:max_lines]
    content = [line for line in content if len(line) > 2]   # Remove blank lines
    seqs = np.zeros((sequence_length, len(content), alphabet_size))
    for ix, line in enumerate(content):
        padded_line = (line + " " * sequence_length)[:sequence_length]
        seqs[:, ix, :] = string_to_one_hot(padded_line, alphabet_size)
    return seqs
Exemplo n.º 45
0
 def forward(self, X, mode):
     batch_size = X.shape[0]
     seq_len = X.shape[1]
     X_emb = self.params['W_Emb'][X]
     hm1 = np.zeros((batch_size, self.HID_DIM))
     hs = []
     for t in xrange(seq_len):
         hm1 = self.one_step(X_emb[:,t,:], hm1)
         hs.append(hm1)
     hs = np.stack(hs, axis=1).reshape((batch_size*seq_len, self.HID_DIM))
     pred_out = layers.affine(hs, self.params['W_Softmax'], self.params['b_Softmax'])
     return pred_out.reshape((batch_size, seq_len, self.WORD_DIM))
Exemplo n.º 46
0
def l2_loss(x, label):
    """
    The Mean Square Error loss for regression.
    """
    N = x.shape[0]
    C = x.shape[1]
    if len(label.shape) == 1:
        #convert it to one hot encoding
        onehot_label = np.zeros([N, C])
        np.onehot_encode(label, onehot_label)
    else:
        onehot_label = label
    return np.sum((x - onehot_label) ** 2) / N
Exemplo n.º 47
0
 def preprocess(self, img):
     """ Preprocess a 210x160x3 uint8 frame into a 6400 (80x80) (1 x input_size) float vector."""
     # Crop, down-sample, erase background and set foreground to 1.
     # Ref: https://gist.github.com/karpathy/a4166c7fe253700972fcbc77e4ea32c5
     img = img[35:195]
     img = img[::2, ::2, 0]
     img[img == 144] = 0
     img[img == 109] = 0
     img[img != 0] = 1
     curr = np.expand_dims(img.astype(numpy.float).ravel(), axis=0)
     # Subtract the last preprocessed image.
     diff = curr - self.prev if self.prev is not None else np.zeros((1, curr.shape[1]))
     self.prev = curr
     return diff
Exemplo n.º 48
0
def main():
    X = np.random.randn(10,3,256,256)
    w = np.random.randn(8,3,3,3)
    b = np.zeros((8,))
    params = {
        'pad':1,
        'stride':2
    }

    start = time.time()

    conv_forward_naive(X,w,b,params)

    print time.time() - start
Exemplo n.º 49
0
    def train_loss(*args):
      inputs = args[0]
      softmax_label = args[1]
      probs = self.symbol_func(**self.make_mxnet_weight_dict(inputs, softmax_label, args[self.data_target_cnt:len(args)]))
      if softmax_label is None:
        return probs 

      samples_num = X.shape[0]
      targets = np.zeros((samples_num, self.num_classes))
      targets[np.arange(samples_num), softmax_label] = 1
      loss = -np.sum(targets * np.log(probs)) / samples_num
      for i in self.get_index_reg_weight():
        loss = loss + np.sum(0.5*args[i]**2*self.reg)

      return loss
Exemplo n.º 50
0
def test_op_statistics():

    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)
    
    
    def predict(weights, inputs):
        return sigmoid(np.dot(inputs, weights))
    
    
    def training_loss(weights, inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l
    
    
    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(
            np.argmax(
                preds, axis=1) - np.argmax(
                    targets, axis=1))
        return (256 - error) * 100 / 256.0
    
    np.record_op_stat()
    
    xshape = (256, 500)
    wshape = (500, 250)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1
    weights = random.rand(*wshape) - 0.5
    
    training_gradient_fun = grad(training_loss)
    
    for i in range(30):
        print('Trained accuracy #{}: {}%'.format(i, training_accuracy(weights,
                                                                      inputs)))
        gr = training_gradient_fun(weights, inputs)
        weights -= gr * 0.01
    
    # Print Op Statistics Info
    np.show_op_stat()
Exemplo n.º 51
0
def test_lr_grad():
    inputs = rng.rand(32, 64) * 0.1
    targets = np.zeros((32, 10))
    truth = rng.randint(0, 10, 32)
    targets[np.arange(32), truth] = 1
    
    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)
    
    def training_loss(weights):
        preds = sigmoid(np.dot(inputs, weights))
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l
    
    weights = rng.rand(64, 10) * 0.01

    return gradient_checker.quick_grad_check(training_loss, weights, rs=rng)
Exemplo n.º 52
0
def test_mxnet_logistic():
    def sigmoid(x):
        return np.multiply(0.5, np.add(np.tanh(x), 1))

    xshape = (256, 500)
    #needs to reverse. because of mxnet's setting
    wshape = (250, 500)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1
    weights = np.random.rand(*wshape) - 0.5

    x = mx.sym.Variable(name='x')
    fc = mx.sym.FullyConnected(name='fc', data=x, num_hidden=250)
    act = mx.sym.Activation(data=fc, act_type='sigmoid')

    f = core.Function(act, {'x': xshape})

    def predict(weights, inputs):
        #return f( data=[('x', inputs)], weight=[('fc_weight', weights)], ctx=mx.cpu())
        return f(x=inputs, fc_weight=weights)

    def training_loss(weights, inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        return -np.sum(np.log(label_probabilities))

    training_gradient_fun = core.grad(training_loss)

    print('Initial loss: {}'.format(training_loss(weights, inputs)))
    for i in range(100):
        gr = training_gradient_fun(weights, inputs)
        #print('Training gradient: {}'.format(gr))
        weights -= gr * 0.1
        if i % 10 == 0:
            print('Trained loss: {}'.format(training_loss(weights, inputs)))

    # The training loss should be around 300 in a bug-free Minpy
    if (training_loss(weights, inputs)[0] > 600):
        assert (False)
Exemplo n.º 53
0
def test_logistic():
    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)

    def predict(weights, inputs):
        return sigmoid(np.dot(inputs, weights))

    def training_loss(weights, inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l

    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(
            np.argmax(
                preds, axis=1) - np.argmax(
                    targets, axis=1))
        return (256 - error) * 100 / 256.0

    xshape = (256, 500)
    wshape = (500, 250)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1
    weights = random.rand(*wshape) - 0.5

    training_gradient_fun = grad(training_loss)

    for i in range(200):
        print('Trained accuracy #{}: {}%'.format(
            i, training_accuracy(weights, inputs)))
        gr = training_gradient_fun(weights, inputs)
        weights -= gr * 0.01

    # The accuracy should be 100 in bug-free MinPy
    if (training_accuracy(weights, inputs) < 95):
        assert (False)
Exemplo n.º 54
0
def test_slice():

    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)
    
    def predict(weights, inputs):
        # Test Slice
        sliced_weights = weights[:, ::2]
        y = sigmoid(np.dot(inputs, sliced_weights))
        return y
    
    def training_loss(weights, inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l
    
    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
        return (256 - error) * 100 / 256.0
    
    xshape = (256, 500)
    # wshape = (500, 250)
    wshape = (500, 500)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1
    weights = random.rand(*wshape) - 0.5
    
    training_gradient_fun = grad(training_loss)
    
    for i in range(20):
        print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs)))
        gr = training_gradient_fun(weights, inputs)
        print('Gradient Size', gr.shape)
        print('Gradient example', gr[0,:10].asnumpy())
        weights -= gr * 0.01
Exemplo n.º 55
0
  def __init__(self, hidden_dims, input_dim=3*32*32, num_classes=10,
               dropout=0, use_batchnorm=False, reg=0.0,
               weight_scale=1e-2, seed=None, dtype=py_np.float64, conv_mode='lazy'):

    """
    Initialize a new FullyConnectedNet.
    
    Inputs:
    - hidden_dims: A list of integers giving the size of each hidden layer.
    - input_dim: An integer giving the size of the input.
    - num_classes: An integer giving the number of classes to classify.
    - dropout: Scalar between 0 and 1 giving dropout strength. If dropout=0 then
      the network should not use dropout at all.
    - use_batchnorm: Whether or not the network should use batch normalization.
    - reg: Scalar giving L2 regularization strength.
    - weight_scale: Scalar giving the standard deviation for random
      initialization of the weights.
    - seed: If not None, then pass this random seed to the dropout layers. This
      will make the dropout layers deteriminstic so we can gradient check the
      model.
    """
    super(FullyConnectedNet, self).__init__(conv_mode)
    self.use_batchnorm = use_batchnorm
    self.use_dropout = dropout > 0
    self.reg = reg
    self.num_layers = 1 + len(hidden_dims)
    self.params = {}

    #Define parameter name given # layer
    self.w_name = lambda l: 'W' + str(l)
    self.b_name = lambda l: 'b' + str(l)
    self.bn_ga_name = lambda l: 'bn_ga' + str(l)
    self.bn_bt_name = lambda l: 'bn_bt' + str(l)

    for l in range(self.num_layers):
      if l == 0:
        input_d = input_dim
      else:
        input_d = hidden_dims[l-1]

      if l < self.num_layers - 1:
        out_d = hidden_dims[l]
      else:
        out_d = num_classes

      self.params[self.w_name(l)] = random.randn(input_d, out_d) * weight_scale
      self.params[self.b_name(l)] = np.zeros((out_d))
      if l < self.num_layers and self.use_batchnorm:
        self.params[self.bn_ga_name(l)] = np.ones((out_d))
        self.params[self.bn_bt_name(l)] = np.zeros((out_d))

    self.param_keys = self.params.keys()

    # When using dropout we need to pass a dropout_param dictionary to each
    # dropout layer so that the layer knows the dropout probability and the mode
    # (train / test). You can pass the same dropout_param to each dropout layer.
    self.dropout_param = {}
    if self.use_dropout:
      self.dropout_param = {'mode': 'train', 'p': dropout}
      if seed is not None:
        self.dropout_param['seed'] = seed
    
    # With batch normalization we need to keep track of running means and
    # variances, so we need to pass a special bn_param object to each batch
    # normalization layer.
    self.bn_params = []
    if self.use_batchnorm:
      self.bn_params = [{'mode': 'train'} for i in xrange(self.num_layers - 1)]
    
    # Build key's index in loss func's arglist
    self.key_args_index = {}
    for i, key in enumerate(self.param_keys):
      # data, targets would be the first two elments in arglist
      self.key_args_index[key] = self.data_target_cnt + i

    # Init Key to index in loss_function args
    self.w_idx = self.wrap_param_idx(self.w_name)
    self.b_idx = self.wrap_param_idx(self.b_name)
    self.bn_ga_idx = self.wrap_param_idx(self.bn_ga_name)
    self.bn_bt_idx = self.wrap_param_idx(self.bn_bt_name)
Exemplo n.º 56
0
    preds = predict(weights, inputs)
    label_probabilities = preds * targets + (1 - preds) * (1 - targets)
    l = -np.sum(np.log(label_probabilities))
    return l

def training_accuracy(weights, inputs):
    preds = predict(weights, inputs)
    error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
    return (256 - error) * 100 / 256.0

xshape = (256, 500)
# wshape = (500, 250)
wshape = (500, 500)
tshape = (256, 250)
inputs = random.rand(*xshape) - 0.5
targets = np.zeros(tshape)
truth = random.randint(0, 250, 256)
targets[np.arange(256), truth] = 1
weights = random.rand(*wshape) - 0.5

training_gradient_fun = grad(training_loss)

def NumpyVarToMinpy(var):
  return minpy.array.Value.wrap(var)

def MinpyVarToNumpy(var):
  return minpy.array.Value.wrap(var).get_data(ArrayType.NUMPY)

for i in range(20):
    print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs)))
    gr = training_gradient_fun(weights, inputs)
Exemplo n.º 57
0
def batchnorm_forward(x, gamma, beta, bn_param):
  """
  Forward pass for batch normalization.
  
  During training the sample mean and (uncorrected) sample variance are
  computed from minibatch statistics and used to normalize the incoming data.
  During training we also keep an exponentially decaying running mean of the mean
  and variance of each feature, and these averages are used to normalize data
  at test-time.

  At each timestep we update the running averages for mean and variance using
  an exponential decay based on the momentum parameter:

  running_mean = momentum * running_mean + (1 - momentum) * sample_mean
  running_var = momentum * running_var + (1 - momentum) * sample_var

  Note that the batch normalization paper suggests a different test-time
  behavior: they compute sample mean and variance for each feature using a
  large number of training images rather than using a running average. For
  this implementation we have chosen to use running averages instead since
  they do not require an additional estimation step; the torch7 implementation
  of batch normalization also uses running averages.

  Input:
  - x: Data of shape (N, D)
  - gamma: Scale parameter of shape (D,)
  - beta: Shift paremeter of shape (D,)
  - bn_param: Dictionary with the following keys:
    - mode: 'train' or 'test'; required
    - eps: Constant for numeric stability
    - momentum: Constant for running mean / variance.
    - running_mean: Array of shape (D,) giving running mean of features
    - running_var Array of shape (D,) giving running variance of features

  Returns a tuple of:
  - out: of shape (N, D)
  - cache: A tuple of values needed in the backward pass
  """
  mode = bn_param['mode']
  eps = bn_param.get('eps', 1e-5)
  momentum = bn_param.get('momentum', 0.9)

  N, D = x.shape
  running_mean = bn_param.get('running_mean', np.zeros(D, dtype=x.dtype))
  running_var = bn_param.get('running_var', np.zeros(D, dtype=x.dtype))

  out, cache = None, None
  if mode == 'train':
    #############################################################################
    # TODO: Implement the training-time forward pass for batch normalization.   #
    # Use minibatch statistics to compute the mean and variance, use these      #
    # statistics to normalize the incoming data, and scale and shift the        #
    # normalized data using gamma and beta.                                     #
    #                                                                           #
    # You should store the output in the variable out. Any intermediates that   #
    # you need for the backward pass should be stored in the cache variable.    #
    #                                                                           #
    # You should also use your computed sample mean and variance together with  #
    # the momentum variable to update the running mean and running variance,    #
    # storing your result in the running_mean and running_var variables.        #
    #############################################################################
    pass
    #############################################################################
    #                             END OF YOUR CODE                              #
    #############################################################################
  elif mode == 'test':
    #############################################################################
    # TODO: Implement the test-time forward pass for batch normalization. Use   #
    # the running mean and variance to normalize the incoming data, then scale  #
    # and shift the normalized data using gamma and beta. Store the result in   #
    # the out variable.                                                         #
    #############################################################################
    pass
    #############################################################################
    #                             END OF YOUR CODE                              #
    #############################################################################
  else:
    raise ValueError('Invalid forward batchnorm mode "%s"' % mode)

  # Store the updated running means back into bn_param
  bn_param['running_mean'] = running_mean
  bn_param['running_var'] = running_var

  return out, cache
def batchnorm_forward(x, gamma, beta, bn_param):
  """
  Forward pass for batch normalization.
  
  During training the sample mean and (uncorrected) sample variance are
  computed from minibatch statistics and used to normalize the incoming data.
  During training we also keep an exponentially decaying running mean of the mean
  and variance of each feature, and these averages are used to normalize data
  at test-time.

  At each timestep we update the running averages for mean and variance using
  an exponential decay based on the momentum parameter:

  running_mean = momentum * running_mean + (1 - momentum) * sample_mean
  running_var = momentum * running_var + (1 - momentum) * sample_var

  Note that the batch normalization paper suggests a different test-time
  behavior: they compute sample mean and variance for each feature using a
  large number of training images rather than using a running average. For
  this implementation we have chosen to use running averages instead since
  they do not require an additional estimation step; the torch7 implementation
  of batch normalization also uses running averages.

  Input:
  - x: Data of shape (N, D)
  - gamma: Scale parameter of shape (D,)
  - beta: Shift paremeter of shape (D,)
  - bn_param: Dictionary with the following keys:
    - mode: 'train' or 'test'; required
    - eps: Constant for numeric stability
    - momentum: Constant for running mean / variance.
    - running_mean: Array of shape (D,) giving running mean of features
    - running_var Array of shape (D,) giving running variance of features

  Returns a tuple of:
  - out: of shape (N, D)
  - cache: A tuple of values needed in the backward pass
  """
  mode = bn_param['mode']
  eps = bn_param.get('eps', 1e-5)
  momentum = bn_param.get('momentum', 0.9)

  N, D = x.shape
  running_mean = bn_param.get('running_mean', np.zeros(D))
  running_var = bn_param.get('running_var', np.zeros(D))

  out = None
  if mode == 'train':
    mean = np.sum(x, axis = 0)/float(N)
    x_mean = (x - mean)

    sqr_x_mean = x_mean ** 2
    var = np.sum(sqr_x_mean, axis = 0)/float(N)
    sqrt_var = np.sqrt(var + eps)

    inv_sqrt_var = 1.0/sqrt_var

    x_hat = x_mean * inv_sqrt_var
    out = gamma * x_hat + beta

    running_mean = momentum*running_mean + (1.0 - momentum) * mean
    running_var = momentum*running_var + (1.0 - momentum) * var
  elif mode == 'test':
    x_hat = (x - running_mean)/np.sqrt(running_var + eps)
    out = gamma * x_hat + beta
  else:
    raise ValueError('Invalid forward batchnorm mode "%s"' % mode)

  # Store the updated running means back into bn_param
  bn_param['running_mean'] = running_mean
  bn_param['running_var'] = running_var
 
  return out
Exemplo n.º 59
0
    preds = predict(weights, bias, inputs)
    label_probabilities = preds * targets + (1 - preds) * (1 - targets)
    l = -np.sum(np.log(label_probabilities))
    return l

def training_accuracy(weights, bias, inputs):
    preds = predict(weights, bias, inputs)
    error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
    return (256 - error) * 100 / 256.0

xshape = (256, 500)
wshape = (500, 250)
bshape = (250)
tshape = (256, 250)
inputs = random.rand(*xshape) - 0.5
targets = np.zeros(tshape)
truth = random.randint(0, 250, 256)
targets[np.arange(256), truth] = 1
weights = random.rand(*wshape) - 0.5
#bias = random.rand(bshape) - 0.5
#print bias.shape
bias = np.zeros(bshape)
print bias.shape

training_gradient_fun = grad(training_loss)

for i in range(20):
    print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, bias, inputs)))
    gr = training_gradient_fun(weights, bias, inputs)
    weights -= gr * 0.01