def _initialize_w_c_b(self, n_words, vocab, initial_embedding_dict): self.W = randmatrix(n_words, self.n) # Word weights. self.C = randmatrix(n_words, self.n) # Context weights. if initial_embedding_dict: assert self.n == len(next(iter(initial_embedding_dict.values()))) self.original_embedding = np.zeros((len(vocab), self.n)) self.has_embedding = np.zeros(len(vocab), dtype=bool) for i, w in enumerate(vocab): if w in initial_embedding_dict: self.has_embedding[i] = 1 embedding = np.array(initial_embedding_dict[w]) self.original_embedding[i] = embedding # Divide the original embedding into W and C, # plus some noise to break the symmetry that would # otherwise cause both gradient updates to be # identical. self.W[i] = 0.5 * embedding + noise(self.n) self.C[i] = 0.5 * embedding + noise(self.n) # This is for testing. It differs from # `self.original_embedding` only in that it includes the # random noise we added above to break the symmetry. self.G_start = self.W + self.C self.bw = randmatrix(n_words, 1) self.bc = randmatrix(n_words, 1) self.ones = np.ones((n_words, 1))
def _build_graph(self, vocab, initial_embedding_dict): """Builds the computatation graph. Parameters ------------ vocab : Iterable initial_embedding_dict : dict """ # Constants self.ones = tf.ones([self.n_words, 1]) # Parameters: if initial_embedding_dict is None: # Ordinary GloVe self.W = self._weight_init(self.n_words, self.n, 'W') self.C = self._weight_init(self.n_words, self.n, 'C') else: # This is the case where we have values to use as a # "warm start": self.n = len(next(iter(initial_embedding_dict.values()))) W = randmatrix(len(vocab), self.n) C = randmatrix(len(vocab), self.n) self.original_embedding = np.zeros((len(vocab), self.n)) self.has_embedding = np.zeros(len(vocab)) for i, w in enumerate(vocab): if w in initial_embedding_dict: self.has_embedding[i] = 1.0 embedding = np.array(initial_embedding_dict[w]) self.original_embedding[i] = embedding # Divide the original embedding into W and C, # plus some noise to break the symmetry that would # otherwise cause both gradient updates to be # identical. W[i] = 0.5 * embedding + noise(self.n) C[i] = 0.5 * embedding + noise(self.n) self.W = tf.Variable(W, name='W', dtype=tf.float32) self.C = tf.Variable(C, name='C', dtype=tf.float32) self.original_embedding = tf.constant(self.original_embedding, dtype=tf.float32) self.has_embedding = tf.constant(self.has_embedding, dtype=tf.float32) # This is for testing. It differs from # `self.original_embedding` only in that it includes the # random noise we added above to break the symmetry. self.G_start = W + C self.bw = self._weight_init(self.n_words, 1, 'bw') self.bc = self._weight_init(self.n_words, 1, 'bc') self.model = tf.tensordot(self.W, tf.transpose(self.C), axes=1) + \ tf.tensordot(self.bw, tf.transpose(self.ones), axes=1) + \ tf.tensordot(self.ones, tf.transpose(self.bc), axes=1)