def get_updates(self, params, cost): grads = self.get_grads(params, cost) updates = OrderedDict() t_prev = shared(np.asarray(0., dtype=get_dtype())) one = tensor.constant(1) t = t_prev + 1 a_t = self.learning_rate * tensor.sqrt(one - self.beta2**t) / ( one - self.beta1**t) for param, g_t in zip(params, grads): value = param.get_value(borrow=True) m_prev = shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) v_prev = shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) m_t = self.beta1 * m_prev + (one - self.beta1) * g_t v_t = self.beta2 * v_prev + (one - self.beta2) * g_t**2 step = a_t * m_t / (tensor.sqrt(v_t) + self.epsilon) updates[m_prev] = m_t updates[v_prev] = v_t updates[param] = param - step updates[t_prev] = t updates.update(self.get_lr_updates()) return updates
def get_updates(self, params, cost): grads = self.get_grads(params, cost) t_prev = shared(np.asarray(0., dtype=get_dtype())) updates = OrderedDict() # Using theano constant to prevent upcasting of float32 one = tensor.constant(1) t = t_prev + 1 a_t = self.learning_rate / (one - self.beta1**t) for param, g_t in zip(params, grads): value = param.get_value(borrow=True) m_prev = shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) u_prev = shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) m_t = self.beta1 * m_prev + (one - self.beta1) * g_t u_t = tensor.maximum(self.beta2 * u_prev, abs(g_t)) step = a_t * m_t / (u_t + self.epsilon) updates[m_prev] = m_t updates[u_prev] = u_t updates[param] = param - step updates[t_prev] = t updates.update(self.get_lr_updates()) return updates
def sample(self, size): flat_shape = (size[0], np.prod(size[1:])) a = get_rng().normal(loc=0., scale=1., size=flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) q = u if u.shape == flat_shape else v value = q.reshape(size).astype(get_dtype()) return value
def __init__(self, learning_rate=0.001, decay=0., clip_norm=0., max_norm=0.): self.learning_rate = shared(np.cast[get_dtype()](learning_rate)) self.decay = decay self.clip_norm = clip_norm self.max_norm = max_norm
def sample(self, size): value = get_rng().normal(loc=0.0, scale=self.scale, size=size) value = value.astype(get_dtype()) return value
def sample(self, size): value = get_rng().uniform(-self.scale, self.scale, size=size) value = value.astype(get_dtype()) return value
def sample(self, size): value = np.ones(size, dtype=get_dtype()) return value
def build(self): # load vocabulary with open(self.vocab_path, 'rb') as fin: vocabs_freqs = pickle.load(fin) threshold = 1 if self.threshold is None else self.threshold sorted_vocab = [word for word, freq in sorted(vocabs_freqs.items()) if freq >= threshold] sorted_vocab.append(_UNKNOWN) sorted_vocab.append(_ZERO) self.idx_to_vocab = {i: vocab for i, vocab in enumerate(sorted_vocab)} self.vocab_to_idx = {vocab: i for i, vocab in enumerate(sorted_vocab)} pkl_path = "./f_data/prefix-{}-thre-{}-valid-{}-test-{}-total-{}.pkl".format( self.prefix, self.threshold, self.valid_split, self.test_split, self.total_len) print("Building data ...") if os.path.exists(pkl_path): with open(pkl_path, 'rb') as fin: self.all_xs, self.all_ys, \ self._train_start, self._train_end, \ self._valid_start, self._valid_end, \ self._test_start, self._test_end = pickle.load(fin) else: # load xs in index all_xs = [] remove_idxs = [] with open(os.path.join(os.getcwd(), self.xs_path), encoding='utf-8') as fin: i = 0 for line in fin: sentences = [sent.strip().split() for sent in line.strip().split("\t")] if len(sentences) != 2: print("Not A Pair: {}".format(line)) remove_idxs.append(i) else: all_xs.append(sentences) i += 1 if i == self.total_len: break self.all_xs = all_xs # load ys with open(os.path.join(os.getcwd(), self.ys_path), 'rb') as fin: idx_all_ys = pickle.load(fin) if self.total_len > 0: idx_all_ys = idx_all_ys[:self.total_len] for i in remove_idxs: idx_all_ys.pop(i) idx_all_ys = np.asarray(idx_all_ys, dtype='int32') self.all_ys = np.zeros((idx_all_ys.shape[0], 2), dtype=get_dtype()) for i in range(2): self.all_ys[idx_all_ys == i, i] = 1 if self.total_len == -1: self.total_len = len(all_xs) # shuffle data self.shuffle_data(self.all_xs, self.all_ys) # the start and end of the valid and test splits valid_len = int(self.total_len * self.valid_split) test_len = int(self.total_len * self.test_split) train_len = int(self.total_len * (1 - self.valid_split - self.test_split)) self._train_start, self._train_end = 0, train_len self._valid_start, self._valid_end = train_len, train_len + valid_len self._test_start, self._test_end = train_len + valid_len, train_len + valid_len + test_len # pickle with open(pkl_path, 'wb') as fin: dump_contents = [self.all_xs, self.all_ys, self._train_start, self._train_end, self._valid_start, self._valid_end, self._test_start, self._test_end] pickle.dump(dump_contents, fin) assert len(self.all_xs) == len(self.all_ys) self.index_to_tag = self.get_index_to_tag()
def build(self, **kwargs): assert self.comp_objective is not None assert self.comp_optimizer is not None # random seed if self.seed: set_seed(self.seed) # forward train_prob_ys, train_ys, train_loss = self._forward(True) if self.train_test_split: predict_prob_ys, predict_ys, predict_loss = self._forward(False) else: predict_prob_ys, predict_ys, predict_loss = train_prob_ys, train_ys, train_loss # regularizers regularizers = [] for layer in self.comp_layers: regularizers.extend(layer.regularizers) regularizer_loss = tensor.cast(tensor.sum(regularizers), get_dtype()) # total loss total_train_losses = regularizer_loss + train_loss # params params = [] for layer in self.comp_layers: params += layer.params # layer updates layer_updates = OrderedDict() for layer in self.comp_layers: layer_updates.update(layer.updates) # model updates updates = self.comp_optimizer(params, total_train_losses) updates.update(layer_updates) # inputs if is_iterable(self.input_tensor): inputs = list(self.input_tensor) + [self.output_tensor] else: inputs = [self.input_tensor, self.output_tensor] train_outputs = [ train_ys, ] # train functions for metric in self.train_metrics: if isinstance(metric, metrics.Regularizer): train_outputs.append(regularizer_loss) elif isinstance(metric, metrics.Loss): train_outputs.append(train_loss) elif isinstance(metric, metrics.TotalLoss): train_outputs.append(total_train_losses) else: train_outputs.append(metric(train_prob_ys, self.output_tensor)) self.train_func_for_eval = function(inputs=inputs, outputs=train_outputs, updates=updates) # test functions test_outputs = [ predict_ys, ] for metric in self.predict_metrics: if isinstance(metric, metrics.Loss): test_outputs.append(predict_loss) else: test_outputs.append(metric(predict_prob_ys, self.output_tensor)) self.predict_func_for_eval = function(inputs=inputs, outputs=test_outputs)