def initParams(self): np.random.seed(12341) # Word vectors #self.L = 0.01*np.random.randn(self.wvecDim,self.numWords) self.L = load_word_vectors().T # shape is (num_words, word_dim) # Hidden activation weights for layer 1 self.W1 = 0.01*np.random.randn(self.wvecDim,2*self.wvecDim) self.b1 = np.zeros((self.wvecDim)) # Hidden activation weights for layer 2 self.W2 = 0.01*np.random.randn(self.middleDim,self.wvecDim) self.b2 = np.zeros((self.middleDim)) # Softmax weights self.Ws = 0.01*np.random.randn(self.outputDim,self.middleDim) # note this is " U " in the notes and the handout.. there is a reason for the change in notation self.bs = np.zeros((self.outputDim)) self.stack = [self.L, self.W1, self.b1, self.W2, self.b2, self.Ws, self.bs] # Gradients self.dW1 = np.empty(self.W1.shape) self.db1 = np.empty((self.wvecDim)) self.dW2 = np.empty(self.W2.shape) self.db2 = np.empty((self.middleDim)) self.dWs = np.empty(self.Ws.shape) self.dbs = np.empty((self.outputDim))
def initParams(self): np.random.seed(12341) # Word vectors #self.L = 0.01*np.random.randn(self.wvecDim,self.numW) self.L = load_word_vectors().T # shape is (num_words, word_dim) # Hidden layer parameters MULT = 0.01 self.W = MULT * np.random.randn(self.wvecDim, 2 * self.wvecDim) self.b = np.zeros((self.wvecDim)) # Softmax weights self.Ws = MULT * np.random.randn(self.outputDim, self.wvecDim) # U self.bs = np.zeros((self.outputDim)) self.stack = [self.L, self.W, self.b, self.Ws, self.bs] # Gradients self.dW = np.empty(self.W.shape) self.db = np.empty((self.wvecDim)) self.dWs = np.empty(self.Ws.shape) self.dbs = np.empty((self.outputDim))