Exemplo n.º 1
0
 def feedforward(self, X, C):
     """Run feedforward for this layer.
     """
     # Cleanup debris from any previous feedforward
     self._cleanup()
     assert ((self.bias_dim >= 5) or (self.source_dim >= 5))
     # Record the incoming list of row indices to extract
     self.X = X
     self.C = C.astype(np.uint32)
     # Extract the relevant bias parameter rows
     Wb = self.params['Wb'].take(C, axis=0)
     if (self.bias_dim < 5):
         # No context-adaptive bias term should be applied if self.bias_dim
         # is < 5. I.e. only information coming up from the word LUT, and
         # possibly rescaled by this layer, should be used in prediction.
         Wb = zeros(Wb.shape)
     # Get the feature re-weighting and bias adjustment parameters
     if self.do_rescale:
         Wm = self.params['Wm'].take(C, axis=0)
         self.Wm_exp = ne.evaluate('exp(Wm)', optimization='aggressive')
         self.Wm_sig = self.Wm_exp / (1.0 + self.Wm_exp)
         if (self.source_dim < 5):
             # Information from the word LUT should not pass through this
             # layer. When source_dim < 5, we assume that we are meant to
             # do prediction using only the context-adaptive biases.
             self.Wm_exp = ones(Wm.shape)
             self.Wm_sig = zeros(Wm.shape)
     else:
         self.Wm_sig = ones(X.shape)
     # Modify X by augmenting a multi-dimensional bias and rescaling
     self.Y = np.hstack((Wb, (X * self.Wm_sig)))
     return self.Y
Exemplo n.º 2
0
 def init_params(self, w_scale=0.01, b_scale=0.0):
     """Randomly initialize the weights in this layer."""
     self.params['W'] = w_scale * randn((self.key_count, self.dim_input))
     self.grads['W'] = zeros((self.key_count, self.dim_input))
     self.params['b'] = zeros((self.key_count, ))
     self.grads['b'] = zeros((self.key_count, ))
     return
Exemplo n.º 3
0
 def init_params(self, w_scale=0.01, b_scale=0.0):
     """Randomly initialize the weights in this layer."""
     self.params['W'] = w_scale * randn((self.key_count, self.dim_input))
     self.grads['W'] = zeros((self.key_count, self.dim_input))
     self.params['b'] = zeros((self.key_count,))
     self.grads['b'] = zeros((self.key_count,))
     return
Exemplo n.º 4
0
    def ff_bp(self, X, code_keys, code_signs, do_grad=True):
        """Perform feedforward and then backprop for this layer.

        By setting do_grad to False, we can just compute the loss, without
        making modifications to the gradient accumulators (i.e. no backprop).
        """
        # check array types, to avoid "silent" type errors in Cython code
        assert (type(X[0, 0]) == np.float32)
        assert (type(code_keys[0, 0]) == np.uint32)
        assert (type(code_signs[0, 0]) == np.float32)
        # check for valid input shapes
        assert (X.shape[1] == self.params['W'].shape[1])
        assert (code_keys.shape[0] == X.shape[0])
        assert (code_signs.shape[0] == X.shape[0])
        # cleanup debris from any previous feedforward
        self._cleanup()
        # change from boolean to int, for Cython code
        do_grad = 1 if do_grad else 0
        # do feedforward and backprop all in one go
        dLdX = zeros(X.shape)
        L_cy = zeros(code_keys.shape)
        hsm_ff_bp(code_keys, code_signs, X, self.params['W'], self.params['b'], \
                  dLdX, self.grads['W'], self.grads['b'], L_cy, do_grad)
        L_cy_sum = np.sum(L_cy)
        L_cy_pre = L_cy_sum
        # Derp dorp
        L = L_cy_sum
        if do_grad:
            if len(self.grad_idx) == 0:
                self.grad_idx = np.unique(code_keys)
            else:
                self.grad_idx = np.unique(np.concatenate( \
                        (self.grad_idx, np.unique(code_keys)) ))
        return [dLdX, L]
Exemplo n.º 5
0
    def ff_bp(self, X, code_keys, code_signs, do_grad=True):
        """Perform feedforward and then backprop for this layer.

        By setting do_grad to False, we can just compute the loss, without
        making modifications to the gradient accumulators (i.e. no backprop).
        """
        # check array types, to avoid "silent" type errors in Cython code
        assert(type(X[0,0]) == np.float32)
        assert(type(code_keys[0,0]) == np.uint32)
        assert(type(code_signs[0,0]) == np.float32)
        # check for valid input shapes
        assert(X.shape[1] == self.params['W'].shape[1])
        assert(code_keys.shape[0] == X.shape[0])
        assert(code_signs.shape[0] == X.shape[0])
        # cleanup debris from any previous feedforward
        self._cleanup()
        # change from boolean to int, for Cython code
        do_grad = 1 if do_grad else 0
        # do feedforward and backprop all in one go
        dLdX = zeros(X.shape)
        L_cy = zeros(code_keys.shape)
        hsm_ff_bp(code_keys, code_signs, X, self.params['W'], self.params['b'], \
                  dLdX, self.grads['W'], self.grads['b'], L_cy, do_grad)
        L_cy_sum = np.sum(L_cy)
        L_cy_pre = L_cy_sum
        # Derp dorp
        L = L_cy_sum
        if do_grad:
            if len(self.grad_idx) == 0:
                self.grad_idx = np.unique(code_keys)
            else:
                self.grad_idx = np.unique(np.concatenate( \
                        (self.grad_idx, np.unique(code_keys)) ))
        return [dLdX, L]
Exemplo n.º 6
0
 def init_params(self, w_scale=0.01):
     """Randomly initialize the weights in this layer."""
     self.params['Wm'] = w_scale * randn((self.key_count, self.source_dim))
     self.grads['Wm'] = zeros(self.params['Wm'].shape)
     self.params['Wb'] = w_scale * randn((self.key_count, self.bias_dim))
     self.grads['Wb'] = zeros(self.params['Wb'].shape)
     return
Exemplo n.º 7
0
 def init_params(self, w_scale=0.01):
     """Randomly initialize the weights in this layer."""
     self.params['Wm'] = w_scale * randn((self.key_count, self.source_dim))
     self.grads['Wm'] = zeros(self.params['Wm'].shape)
     self.params['Wb'] = w_scale * randn((self.key_count, self.bias_dim))
     self.grads['Wb'] = zeros(self.params['Wb'].shape)
     return
Exemplo n.º 8
0
 def feedforward(self, X, C):
     """Run feedforward for this layer.
     """
     # Cleanup debris from any previous feedforward
     self._cleanup()
     assert ((self.bias_dim >= 5) or (self.source_dim >= 5))
     # Record the incoming list of row indices to extract
     self.X = X
     self.C = C.astype(np.uint32)
     # Extract the relevant bias parameter rows
     Wb = self.params['Wb'].take(C, axis=0)
     if (self.bias_dim < 5):
         # No context-adaptive bias term should be applied if self.bias_dim
         # is < 5. I.e. only information coming up from the word LUT, and
         # possibly rescaled by this layer, should be used in prediction.
         Wb = zeros(Wb.shape)
     # Get the feature re-weighting and bias adjustment parameters
     if self.do_rescale:
         Wm = self.params['Wm'].take(C, axis=0)
         self.Wm_exp = ne.evaluate('exp(Wm)', optimization='aggressive')
         self.Wm_sig = self.Wm_exp / (1.0 + self.Wm_exp)
         if (self.source_dim < 5):
             # Information from the word LUT should not pass through this
             # layer. When source_dim < 5, we assume that we are meant to
             # do prediction using only the context-adaptive biases.
             self.Wm_exp = ones(Wm.shape)
             self.Wm_sig = zeros(Wm.shape)
     else:
         self.Wm_sig = ones(X.shape)
     # Modify X by augmenting a multi-dimensional bias and rescaling
     self.Y = np.hstack((Wb, (X * self.Wm_sig)))
     return self.Y
Exemplo n.º 9
0
 def init_params(self, w_scale=0.01, param='Wb'):
     """Randomly initialize the weights in this layer."""
     assert((param == 'Wb') or (param == 'Wm'))
     if param == 'Wm':
         self.params['Wm'] = w_scale * randn((self.key_count, self.source_dim))
         self.grads['Wm'] = zeros(self.params['Wm'].shape)
     else:
         self.params['Wb'] = w_scale * randn((self.key_count, self.bias_dim))
         self.grads['Wb'] = zeros(self.params['Wb'].shape)
     return
Exemplo n.º 10
0
 def __init__(self, max_key, embed_dim, n_gram=1):
     # Set stuff for managing this type of layer
     self.key_count = max_key + 1  # add 1 to accommodate 0 indexing
     self.params = {}
     self.params['W'] = 0.01 * randn((self.key_count, embed_dim))
     self.grads = {}
     self.grads['W'] = zeros(self.params['W'].shape)
     self.moms = {}
     self.moms['W'] = zeros(self.params['W'].shape)
     self.grad_idx = set()
     self.embed_dim = embed_dim
     self.n_gram = n_gram
     self.X = []
     self.Y = []
     return
Exemplo n.º 11
0
 def __init__(self, max_key, embed_dim, n_gram=1):
     # Set stuff for managing this type of layer
     self.key_count = max_key + 1 # add 1 to accommodate 0 indexing
     self.params = {}
     self.params['W'] = 0.01 * randn((self.key_count, embed_dim))
     self.grads = {}
     self.grads['W'] = zeros(self.params['W'].shape)
     self.moms = {}
     self.moms['W'] = zeros(self.params['W'].shape)
     self.grad_idx = set()
     self.embed_dim = embed_dim
     self.n_gram = n_gram
     self.X = []
     self.Y = []
     return
Exemplo n.º 12
0
 def batch_train(self, anc_idx, pos_idx, neg_idx, learn_rate=1e-3):
     """Perform a batch update of all parameters based on the given sets
     of anchor, positive example, and negative example indices.
     """
     # Force incoming LUT indices to the right type (i.e. np.uint32)
     anc_idx = anc_idx.astype(np.uint32)
     pos_idx = pos_idx[:,np.newaxis]
     pn_idx = np.hstack((pos_idx, neg_idx)).astype(np.uint32)
     pn_sign = -1.0 * ones(pn_idx.shape)
     pn_sign[:,0] = 1.0
     L = zeros((1,))
     # Do feedforward and backprop through the predictor/predictee tables
     w2v_ff_bp(anc_idx, pn_idx, pn_sign, self.params['Wa'], \
               self.params['Wc'], self.params['b'], self.grads['Wa'], \
               self.grads['Wc'], self.grads['b'], L, 1)
     L = L[0]
     # Apply gradients to (touched only) look-up-table parameters
     a_mod_idx = np.unique(anc_idx)
     c_mod_idx = np.unique(pn_idx)
     ag_update_2d(a_mod_idx, self.params['Wa'], self.grads['Wa'], \
             self.moms['Wa'], learn_rate)
     ag_update_2d(c_mod_idx, self.params['Wc'], self.grads['Wc'], \
             self.moms['Wc'], learn_rate)
     ag_update_1d(c_mod_idx, self.params['b'], self.grads['b'], \
             self.moms['b'], learn_rate)
     return L
Exemplo n.º 13
0
 def batch_train(self, anc_idx, pos_idx, neg_idx, learn_rate=1e-3):
     """Perform a batch update of all parameters based on the given sets
     of anchor, positive example, and negative example indices.
     """
     # Force incoming LUT indices to the right type (i.e. np.uint32)
     anc_idx = anc_idx.astype(np.uint32)
     pos_idx = pos_idx[:, np.newaxis]
     pn_idx = np.hstack((pos_idx, neg_idx)).astype(np.uint32)
     pn_sign = -1.0 * ones(pn_idx.shape)
     pn_sign[:, 0] = 1.0
     L = zeros((1, ))
     # Do feedforward and backprop through the predictor/predictee tables
     w2v_ff_bp(anc_idx, pn_idx, pn_sign, self.params['Wa'], \
               self.params['Wc'], self.params['b'], self.grads['Wa'], \
               self.grads['Wc'], self.grads['b'], L, 1)
     L = L[0]
     # Apply gradients to (touched only) look-up-table parameters
     a_mod_idx = np.unique(anc_idx)
     c_mod_idx = np.unique(pn_idx)
     ag_update_2d(a_mod_idx, self.params['Wa'], self.grads['Wa'], \
             self.moms['Wa'], learn_rate)
     ag_update_2d(c_mod_idx, self.params['Wc'], self.grads['Wc'], \
             self.moms['Wc'], learn_rate)
     ag_update_1d(c_mod_idx, self.params['b'], self.grads['b'], \
             self.moms['b'], learn_rate)
     return L
Exemplo n.º 14
0
 def __init__(self, max_word_key=0, word_dim=0, lam_l2=1e-3):
     # Set basic layer parameters. The max_word_key passed as an argument
     # is incremented by 1 to accommodate 0 indexing.
     self.word_dim = word_dim
     self.word_count = max_word_key + 1
     # Initialize arrays for tracking parameters, gradients, and
     # adagrad "momentums" (i.e. sums of squared gradients).
     self.params = {}
     self.params['Wa'] = 0.01 * randn((self.word_count, word_dim))
     self.params['Wc'] = 0.01 * randn((self.word_count, word_dim))
     self.params['b'] = zeros((self.word_count,))
     self.grads = {}
     self.grads['Wa'] = zeros((self.word_count, word_dim))
     self.grads['Wc'] = zeros((self.word_count, word_dim))
     self.grads['b'] = zeros((self.word_count,))
     self.moms = {}
     self.moms['Wa'] = zeros((self.word_count, word_dim))
     self.moms['Wc'] = zeros((self.word_count, word_dim))
     self.moms['b'] = zeros((self.word_count,))
     # Set l2 regularization parameter
     self.lam_l2 = lam_l2
     # Initialize sets for tracking which words we have trained
     self.trained_Wa = set()
     self.trained_Wc = set()
     return
Exemplo n.º 15
0
 def __init__(self, max_word_key=0, word_dim=0, lam_l2=1e-3):
     # Set basic layer parameters. The max_word_key passed as an argument
     # is incremented by 1 to accommodate 0 indexing.
     self.word_dim = word_dim
     self.word_count = max_word_key + 1
     # Initialize arrays for tracking parameters, gradients, and
     # adagrad "momentums" (i.e. sums of squared gradients).
     self.params = {}
     self.params['Wa'] = 0.01 * randn((self.word_count, word_dim))
     self.params['Wc'] = 0.01 * randn((self.word_count, word_dim))
     self.params['b'] = zeros((self.word_count, ))
     self.grads = {}
     self.grads['Wa'] = zeros((self.word_count, word_dim))
     self.grads['Wc'] = zeros((self.word_count, word_dim))
     self.grads['b'] = zeros((self.word_count, ))
     self.moms = {}
     self.moms['Wa'] = zeros((self.word_count, word_dim))
     self.moms['Wc'] = zeros((self.word_count, word_dim))
     self.moms['b'] = zeros((self.word_count, ))
     # Set l2 regularization parameter
     self.lam_l2 = lam_l2
     # Initialize sets for tracking which words we have trained
     self.trained_Wa = set()
     self.trained_Wc = set()
     return
Exemplo n.º 16
0
 def __init__(self, in_dim=0, max_hs_key=0):
     # Record and initialize some layer parameters
     self.dim_input = in_dim
     self.key_count = max_hs_key + 1 # assume 0 is a key
     self.params = {}
     self.params['W'] = 0.01 * randn((self.key_count, in_dim))
     self.params['b'] = zeros((self.key_count,))
     self.grads = {}
     self.grads['W'] = zeros((self.key_count, in_dim))
     self.grads['b'] = zeros((self.key_count,))
     self.moms = {}
     self.moms['W'] = zeros((self.key_count, in_dim))
     self.moms['b'] = zeros((self.key_count,))
     # Set temp vars to use in feedforward/backprop
     self.X = []
     self.Y = []
     self.dLdX = []
     self.dLdY = []
     self.grad_idx = []
     return
Exemplo n.º 17
0
 def __init__(self, in_dim=0, max_hs_key=0):
     # Record and initialize some layer parameters
     self.dim_input = in_dim
     self.key_count = max_hs_key + 1  # assume 0 is a key
     self.params = {}
     self.params['W'] = 0.01 * randn((self.key_count, in_dim))
     self.params['b'] = zeros((self.key_count, ))
     self.grads = {}
     self.grads['W'] = zeros((self.key_count, in_dim))
     self.grads['b'] = zeros((self.key_count, ))
     self.moms = {}
     self.moms['W'] = zeros((self.key_count, in_dim))
     self.moms['b'] = zeros((self.key_count, ))
     # Set temp vars to use in feedforward/backprop
     self.X = []
     self.Y = []
     self.dLdX = []
     self.dLdY = []
     self.grad_idx = []
     return
Exemplo n.º 18
0
 def xent_loss_and_grad(self, Yh, Y_cat):
     """Cross-entropy loss for predictions Yh given targets Y_cat."""
     # Convert from categorical classes to "one-hot" target vectors
     Y_ind = zeros(Yh.shape)
     Y_ind[np.arange(Y_ind.shape[0]), Y_cat] = 1.0
     # Push one-hot targets vectors to the GPU
     Y_ind = gp.garray(Y_ind)
     # Compute softmax and then cross-entropy loss
     Yh_sm = self.safe_softmax(Yh)
     L = -gp.sum((Y_ind * gp.log(Yh_sm)))
     dLdYh = Yh_sm - Y_ind
     return [L, dLdYh]
Exemplo n.º 19
0
 def __init__(self, max_key=0, source_dim=0, bias_dim=0, do_rescale=False):
     # Set stuff for managing this type of layer
     self.key_count = max_key + 1  # add 1 to accommodate 0 indexing
     self.source_dim = source_dim
     self.bias_dim = bias_dim
     self.do_rescale = do_rescale  # set to True for magical fun
     self.params = {}
     self.params['Wm'] = zeros((self.key_count, source_dim))
     self.params['Wb'] = zeros((self.key_count, bias_dim))
     self.grads = {}
     self.grads['Wm'] = zeros(self.params['Wm'].shape)
     self.grads['Wb'] = zeros(self.params['Wb'].shape)
     self.moms = {}
     self.moms['Wm'] = zeros(self.params['Wm'].shape)
     self.moms['Wb'] = zeros(self.params['Wb'].shape)
     self.grad_idx = set()
     # Set common stuff for all types layers
     self.X = []
     self.C = []
     self.Wm_exp = []
     self.Wm_sig = []
     self.Y = []
     self.dLdX = []
     self.dLdY = []
     return
Exemplo n.º 20
0
 def __init__(self, max_key=0, source_dim=0, bias_dim=0, do_rescale=False):
     # Set stuff for managing this type of layer
     self.key_count = max_key + 1 # add 1 to accommodate 0 indexing
     self.source_dim = source_dim
     self.bias_dim = bias_dim
     self.do_rescale = do_rescale # set to True for magical fun
     self.params = {}
     self.params['Wm'] = zeros((self.key_count, source_dim))
     self.params['Wb'] = zeros((self.key_count, bias_dim))
     self.grads = {}
     self.grads['Wm'] = zeros(self.params['Wm'].shape)
     self.grads['Wb'] = zeros(self.params['Wb'].shape)
     self.moms = {}
     self.moms['Wm'] = zeros(self.params['Wm'].shape)
     self.moms['Wb'] = zeros(self.params['Wb'].shape)
     self.grad_idx = set()
     # Set common stuff for all types layers
     self.X = []
     self.C = []
     self.Wm_exp = []
     self.Wm_sig = []
     self.Y = []
     self.dLdX = []
     self.dLdY = []
     return
Exemplo n.º 21
0
 def init_params(self, w_scale=0.01, b_scale=0.0):
     """Randomly initialize the weights in this layer."""
     self.params['Wa'] = w_scale * randn((self.word_count, self.word_dim))
     self.grads['Wa'] = zeros((self.word_count, self.word_dim))
     self.moms['Wa'] = zeros((self.word_count, self.word_dim)) + 1e-3
     self.params['Wc'] = w_scale * randn((self.word_count, self.word_dim))
     self.grads['Wc'] = zeros((self.word_count, self.word_dim))
     self.moms['Wc'] = zeros((self.word_count, self.word_dim)) + 1e-3
     self.params['b'] = zeros((self.word_count,))
     self.grads['b'] = zeros((self.word_count,))
     self.moms['b'] = zeros((self.word_count,)) + 1e-3
     return
Exemplo n.º 22
0
 def init_params(self, w_scale=0.01, b_scale=0.0):
     """Randomly initialize the weights in this layer."""
     self.params['Wa'] = w_scale * randn((self.word_count, self.word_dim))
     self.grads['Wa'] = zeros((self.word_count, self.word_dim))
     self.moms['Wa'] = zeros((self.word_count, self.word_dim)) + 1e-3
     self.params['Wc'] = w_scale * randn((self.word_count, self.word_dim))
     self.grads['Wc'] = zeros((self.word_count, self.word_dim))
     self.moms['Wc'] = zeros((self.word_count, self.word_dim)) + 1e-3
     self.params['b'] = zeros((self.word_count, ))
     self.grads['b'] = zeros((self.word_count, ))
     self.moms['b'] = zeros((self.word_count, )) + 1e-3
     return
Exemplo n.º 23
0
 def ff_bp(self, X, pos_samples, neg_samples, do_grad=True):
     """Perform feedforward and then backprop for this layer."""
     # check array types, to avoid "silent" type errors in Cython code
     assert (type(X[0, 0]) == np.float32)
     assert (type(pos_samples[0]) == np.uint32)
     assert (type(neg_samples[0, 0]) == np.uint32)
     # check for valid input shapes
     assert (X.shape[1] == self.params['W'].shape[1])
     assert (pos_samples.shape[0] == X.shape[0])
     assert (neg_samples.shape[0] == X.shape[0])
     # check that requested target keys are all valid
     assert (np.max(pos_samples) < self.key_count)
     assert (np.max(neg_samples) < self.key_count)
     # cleanup debris from any previous feedforward
     self._cleanup()
     # change from boolean to int, for Cython code
     do_grad = 1 if do_grad else 0
     # record inputs and keys for positive/negative examples
     pos_samples = pos_samples[:, np.newaxis]
     samp_keys = np.hstack((pos_samples, neg_samples))
     samp_sign = -1.0 * ones(samp_keys.shape)
     samp_sign[:, 0] = 1.0
     # do feedforward and backprop all in one go
     L = zeros(samp_keys.shape)
     dLdX = zeros(X.shape)
     nsl_ff_bp(samp_keys, samp_sign, X, self.params['W'], self.params['b'], \
               dLdX, self.grads['W'], self.grads['b'], L, do_grad)
     # derp dorp
     L = np.sum(L)
     if do_grad:
         if len(self.grad_idx) == 0:
             self.grad_idx = np.unique(samp_keys)
         else:
             self.grad_idx = np.unique(np.concatenate( \
                     (self.grad_idx, np.unique(samp_keys)) ))
     return [dLdX, L]
Exemplo n.º 24
0
 def ff_bp(self, X, pos_samples, neg_samples, do_grad=True):
     """Perform feedforward and then backprop for this layer."""
     # check array types, to avoid "silent" type errors in Cython code
     assert(type(X[0,0]) == np.float32)
     assert(type(pos_samples[0]) == np.uint32)
     assert(type(neg_samples[0,0]) == np.uint32)
     # check for valid input shapes
     assert(X.shape[1] == self.params['W'].shape[1])
     assert(pos_samples.shape[0] == X.shape[0])
     assert(neg_samples.shape[0] == X.shape[0])
     # check that requested target keys are all valid
     assert(np.max(pos_samples) < self.key_count)
     assert(np.max(neg_samples) < self.key_count)
     # cleanup debris from any previous feedforward
     self._cleanup()
     # change from boolean to int, for Cython code
     do_grad = 1 if do_grad else 0
     # record inputs and keys for positive/negative examples
     pos_samples = pos_samples[:,np.newaxis]
     samp_keys = np.hstack((pos_samples, neg_samples))
     samp_sign = -1.0 * ones(samp_keys.shape)
     samp_sign[:,0] = 1.0
     # do feedforward and backprop all in one go
     L = zeros(samp_keys.shape)
     dLdX = zeros(X.shape)
     nsl_ff_bp(samp_keys, samp_sign, X, self.params['W'], self.params['b'], \
               dLdX, self.grads['W'], self.grads['b'], L, do_grad)
     # derp dorp
     L = np.sum(L)
     if do_grad:
         if len(self.grad_idx) == 0:
             self.grad_idx = np.unique(samp_keys)
         else:
             self.grad_idx = np.unique(np.concatenate( \
                     (self.grad_idx, np.unique(samp_keys)) ))
     return [dLdX, L]
Exemplo n.º 25
0
 def batch_test(self, anc_idx, pos_idx, neg_idx):
     """Run a batch through the model, computing losses but not grads.
     """
     anc_idx = anc_idx.astype(np.uint32)
     pos_idx = pos_idx[:, np.newaxis]
     pn_idx = np.hstack((pos_idx, neg_idx)).astype(np.uint32)
     pn_sign = ones(pn_idx.shape)
     pn_sign[:, 0] = -1.0
     L = zeros((1, ))
     # Do feedforward and backprop through the predictor/predictee tables
     w2v_ff_bp(anc_idx, pn_idx, pn_sign, self.params['Wa'], \
            self.params['Wc'], self.params['b'], self.grads['Wa'], \
            self.grads['Wc'], self.grads['b'], L, 0)
     self.grads['Wa'] = 0.0 * self.grads['Wa']
     self.grads['Wc'] = 0.0 * self.grads['Wc']
     self.grads['b'] = 0.0 * self.grads['b']
     L = L[0]
     return L
Exemplo n.º 26
0
 def batch_test(self, anc_idx, pos_idx, neg_idx):
     """Run a batch through the model, computing losses but not grads.
     """
     anc_idx = anc_idx.astype(np.uint32)
     pos_idx = pos_idx[:,np.newaxis]
     pn_idx = np.hstack((pos_idx, neg_idx)).astype(np.uint32)
     pn_sign = ones(pn_idx.shape)
     pn_sign[:,0] = -1.0
     L = zeros((1,))
     # Do feedforward and backprop through the predictor/predictee tables
     w2v_ff_bp(anc_idx, pn_idx, pn_sign, self.params['Wa'], \
            self.params['Wc'], self.params['b'], self.grads['Wa'], \
            self.grads['Wc'], self.grads['b'], L, 0)
     self.grads['Wa'] = 0.0 * self.grads['Wa']
     self.grads['Wc'] = 0.0 * self.grads['Wc']
     self.grads['b'] = 0.0 * self.grads['b']
     L = L[0]
     return L
Exemplo n.º 27
0
    def feedforward(self, X):
        """Run feedforward for this layer.

        The input passed to feedforward here should be either a single list
        of integer indices into the look-up table or a list of lut index lists.
        """
        # Cleanup debris from any previous feedforward
        self._cleanup()
        # Record the incoming list of row indices to extract
        self.X = X.astype(np.uint32)
        # Use look-up table to generate the desired sequences
        if (self.n_gram == 1):
            self.Y = self.params['W'].take(self.X, axis=0)
        else:
            self.Y = zeros((self.X.shape[0], (self.n_gram * self.embed_dim)))
            for i in range(self.n_gram):
                s_idx = i * self.embed_dim
                e_idx = s_idx + self.embed_dim
                self.Y[:,s_idx:e_idx] = self.params['W'].take(self.X[:,i], axis=0)
        return self.Y
Exemplo n.º 28
0
    def feedforward(self, X):
        """Run feedforward for this layer.

        The input passed to feedforward here should be either a single list
        of integer indices into the look-up table or a list of lut index lists.
        """
        # Cleanup debris from any previous feedforward
        self._cleanup()
        # Record the incoming list of row indices to extract
        self.X = X.astype(np.uint32)
        # Use look-up table to generate the desired sequences
        if (self.n_gram == 1):
            self.Y = self.params['W'].take(self.X, axis=0)
        else:
            self.Y = zeros((self.X.shape[0], (self.n_gram * self.embed_dim)))
            for i in range(self.n_gram):
                s_idx = i * self.embed_dim
                e_idx = s_idx + self.embed_dim
                self.Y[:, s_idx:e_idx] = self.params['W'].take(self.X[:, i],
                                                               axis=0)
        return self.Y
Exemplo n.º 29
0
 def init_params(self, w_scale=0.01):
     """Randomly initialize the weights in this layer."""
     self.params['W'] = w_scale * randn((self.key_count, self.embed_dim))
     self.grads['W'] = zeros((self.key_count, self.embed_dim))
     return
Exemplo n.º 30
0
 def init_params(self, w_scale=0.01):
     """Randomly initialize the weights in this layer."""
     self.params['W'] = w_scale * randn((self.key_count, self.embed_dim))
     self.grads['W'] = zeros((self.key_count, self.embed_dim))
     return