def test_bprop(self): """ compare `bprop` results for cpu and gpu backends """ r = [] for i in xrange(self.N): batch_size, dim = self.rng.random_integers(2000, size=2) y_hat = self.rng.randn(batch_size, dim).astype(dtype=np.float32) y = self.rng.randn(batch_size, dim).astype(dtype=np.float32) quagga.processor_type = 'gpu' context = Context() y_hat_gpu = Connector(Matrix.from_npa(y_hat), context, context) y_gpu = Connector(Matrix.from_npa(y)) sse_block = SseBlock(y_hat_gpu, y_gpu) sse_block.fprop() sse_block.bprop() dL_dy_hat_gpu = y_hat_gpu.backward_matrix.to_host() quagga.processor_type = 'cpu' context = Context() y_hat_cpu = Connector(Matrix.from_npa(y_hat), context, context) y_cpu = Connector(Matrix.from_npa(y)) sse_block = SseBlock(y_hat_cpu, y_cpu) sse_block.fprop() sse_block.bprop() dL_dy_hat_cpu = y_hat_cpu.backward_matrix.to_host() r.append(np.allclose(dL_dy_hat_gpu, dL_dy_hat_cpu)) self.assertEqual(sum(r), self.N)
def __init__(self, data, char_to_idx, batch_size, x_device_id, y_device_id): self.data = HomogeneousDataIterator(data, char_to_idx, batch_size, True, True) self.data_iterator = iter(self.data) self.x_context = Context(x_device_id) self.y_context = Context(y_device_id) max_len = 0 for sub_line in data: cur_len = len(sub_line) if cur_len > max_len: max_len = cur_len print max_len self.x = Connector( Matrix.empty(batch_size, max_len - 1, 'int', x_device_id)) self._y = Matrix.empty(batch_size, max_len - 1, 'int', y_device_id) self.y = List([Connector(self._y[:, i]) for i in xrange(max_len - 1)], self.x.ncols) self.lengths = Matrix.empty(self.x.nrows, 1, 'int', x_device_id) self._mask = Matrix.empty(self.x.nrows, self.x.ncols, 'float', x_device_id) self.mask = List( [Connector(self._mask[:, i]) for i in xrange(max_len)], self.x.ncols) self.blocking_contexts = None
def register_usage(self, fu_device_id, bo_device_id=None): """ Register usage of connector's forward_matrix. :param fu_device_id: context in which `forward_matrix` will be used :param bo_device_id: context in which `backward_matrix` of the connector will be calculated """ if not self.bpropagable and bo_device_id: raise ValueError( "Nobody is going to use computation from backward step. " "You mustn't register for backward propagate!") if fu_device_id != self._fo_device_id and fu_device_id not in self._f_matrices: self._f_matrices[fu_device_id] = Matrix.empty_like( self, fu_device_id) self.context[fu_device_id] = Context(fu_device_id) if bo_device_id is None: return self._f_matrices[fu_device_id] for device_id in [self._bu_device_id, bo_device_id]: if device_id not in self._b_matrices: self._b_matrices[device_id] = Matrix.empty_like( self, device_id) if device_id not in self.context: self.context[device_id] = Context(device_id) if self._bu_device_id != bo_device_id and self._bu_device_id not in self._b_matrices_pool: self._b_matrices_pool[self._bu_device_id] = Matrix.empty_like( self, self._bu_device_id) return self._f_matrices[fu_device_id], self._b_matrices[bo_device_id]
def __init__(self, W, b, x, device_id=None): self.f_context = Context(device_id) device_id = self.f_context.device_id if W.bpropagable: self.W, self.dL_dW = W.register_usage(device_id, device_id) else: self.W = W.register_usage(device_id) if b: if b.bpropagable: self.b, self.dL_db = b.register_usage(device_id, device_id) self.ones = Matrix.empty(x.nrows, 1, self.b.dtype, device_id) self.ones.sync_fill(1.0) else: self.b = b.register_usage(device_id) if x.bpropagable: self.x, self.dL_dx = x.register_usage(device_id, device_id) else: self.x = x.register_usage(device_id) output = Matrix.empty(x.nrows, self.W.ncols, device_id=device_id) self.learning = hasattr(self, 'dL_dW') or hasattr(self, 'dL_db') or \ hasattr(self, 'dL_dx') if self.learning: self.b_context = Context(device_id) self.output = Connector(output, device_id) else: self.output = Connector(output)
def __init__(self, x, nonlinearity, device_id=None): """ """ self.f_context = Context(device_id) device_id = self.f_context.device_id self.learning = x.bpropagable if self.learning: self.b_context = Context(device_id) self.x, self.dL_dx = x.register_usage(device_id, device_id) self._df_dpref = Matrix.empty_like(self.x, device_id) else: self.x = x.register_usage(device_id) output = Matrix.empty_like(x, device_id) self.output = Connector(output, device_id if self.learning else None) if nonlinearity == 'sigmoid': self.f = self.x.sigmoid elif nonlinearity == 'tanh': self.f = self.x.tanh elif nonlinearity == 'relu': self.f = self.x.relu elif nonlinearity == 'softmax': raise ValueError('For softmax nonlinearity use SoftmaxBlock!') else: raise ValueError('TODO!') self.training_mode = True
class PtbMiniBatchesGenerator(object): def __init__(self, ptb_train, ptb_valid, batch_size, sentence_max_len, device_id): self.blocking_contexts = None self.context = Context(device_id) device_id = self.context.device_id self.train_offsets = HomogeneousDataGenerator(ptb_train, batch_size, sentence_max_len, randomize=True, infinite=True) self.valid_offsets = HomogeneousDataGenerator(ptb_valid, batch_size, sentence_max_len) train_sentences = np.array([self.train_offsets.flatten_sentences]) valid_sentences = np.array([self.valid_offsets.flatten_sentences]) self.train_sents = Matrix.from_npa(train_sentences, 'int', device_id) self.valid_sents = Matrix.from_npa(valid_sentences, 'int', device_id) self._sent_lengths = np.empty((batch_size, 1), dtype=np.int32, order='F')[...] self.sent_lengths = Matrix.from_npa(self._sent_lengths, device_id=device_id) sentence_batch = Matrix.empty(batch_size, sentence_max_len, 'int', device_id) self.sentence_batch = Connector(sentence_batch, self.context) self.sentence_batch.sync_fill(0) self._mask = Matrix.empty(sentence_batch.nrows, self.sentence_batch.ncols, 'float', device_id) self.mask = List([Connector(self._mask[:, i]) for i in xrange(sentence_max_len)], self.sentence_batch.ncols) self.train_offsets_iterator = iter(self.train_offsets) self.valid_offsets_iterator = iter(self.valid_offsets) self.training_mode = True def set_training_mode(self): self.training_mode = True def set_testing_mode(self): self.training_mode = False def fprop(self): if self.training_mode: offsets = next(self.train_offsets_iterator) sents = self.train_sents else: try: offsets = next(self.valid_offsets_iterator) sents = self.valid_sents except StopIteration as e: self.valid_offsets_iterator = iter(self.valid_offsets) raise e self.context.wait(*self.blocking_contexts) self._sent_lengths = self._sent_lengths.base[:len(offsets)] self.sentence_batch.nrows = len(offsets) for k, offset in enumerate(offsets): self.sentence_batch[k].assign(self.context, sents[:, offset[0]:offset[1]]) self._sent_lengths[k] = offset[1] - offset[0] max_sent_len = int(np.max(self._sent_lengths)) self.sentence_batch.last_modification_context = self.context self.sentence_batch.ncols = max_sent_len self.sent_lengths.assign_npa(self.context, self._sent_lengths) self._mask.mask_column_numbers_row_wise(self.context, self.sent_lengths) for e in self.mask: e.last_modification_context = self.context self.sentence_batch.fprop() self.mask.fprop()
def test_bprop(self): """ compare `bprop` results for cpu and gpu backends """ r = [] for i in xrange(self.N): max_input_sequence_len = self.rng.random_integers(500) sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers( max_input_sequence_len) batch_size = self.rng.random_integers(512) dim = self.rng.random_integers(1500) x = [ self.rng.rand(batch_size, dim).astype(dtype=np.float32) for _ in xrange(max_input_sequence_len) ] state = self.rng.get_state() quagga.processor_type = 'gpu' context = Context() x_gpu = List( [Connector(Matrix.from_npa(e), context, context) for e in x]) smean_pooling_block_gpu = SequentialMeanPoolingBlock(x_gpu) x_gpu.set_length(sequence_len) _, dL_doutput = smean_pooling_block_gpu.output.register_usage( context, context) smean_pooling_block_gpu.fprop() random_matrix = self.rng.rand(dL_doutput.nrows, dL_doutput.ncols) Matrix.from_npa(random_matrix, 'float').copy_to(context, dL_doutput) smean_pooling_block_gpu.bprop() dL_dmatrices_gpu = [e.backward_matrix.to_host() for e in x_gpu] self.rng.set_state(state) quagga.processor_type = 'cpu' context = Context() x_cpu = List( [Connector(Matrix.from_npa(e), context, context) for e in x]) smean_pooling_block_cpu = SequentialMeanPoolingBlock(x_cpu) x_cpu.set_length(sequence_len) _, dL_doutput = smean_pooling_block_cpu.output.register_usage( context, context) smean_pooling_block_cpu.fprop() random_matrix = self.rng.rand(dL_doutput.nrows, dL_doutput.ncols) Matrix.from_npa(random_matrix, 'float').copy_to(context, dL_doutput) smean_pooling_block_cpu.bprop() dL_dmatrices_cpu = [e.backward_matrix.to_host() for e in x_cpu] for dL_dmatrix_gpu, dL_dmatrix_cpu in izip(dL_dmatrices_gpu, dL_dmatrices_cpu): if not np.allclose(dL_dmatrix_gpu, dL_dmatrix_cpu): r.append(False) break else: r.append(True) self.assertEqual(sum(r), self.N)
def __init__(self, dropout_prob, x, seed=42, device_id=None): self.dropout_prob = dropout_prob self.f_context = Context(device_id) device_id = self.f_context.device_id self.generator = Matrix.get_random_generator(seed) if x.bpropagable: self.b_context = Context(device_id) self.x, self.dL_dx = x.register_usage(device_id, device_id) else: self.x = x.register_usage(device_id) self.output = Matrix.empty_like(self.x) self.output = Connector(self.output, device_id if x.bpropagable else None) self.training_mode = True
class DataBlock(object): def __init__(self, data, char_to_idx, batch_size, x_device_id, y_device_id): self.data = HomogeneousDataIterator(data, char_to_idx, batch_size, True, True) self.data_iterator = iter(self.data) self.x_context = Context(x_device_id) self.y_context = Context(y_device_id) max_len = 0 for sub_line in data: cur_len = len(sub_line) if cur_len > max_len: max_len = cur_len print max_len self.x = Connector( Matrix.empty(batch_size, max_len - 1, 'int', x_device_id)) self._y = Matrix.empty(batch_size, max_len - 1, 'int', y_device_id) self.y = List([Connector(self._y[:, i]) for i in xrange(max_len - 1)], self.x.ncols) self.lengths = Matrix.empty(self.x.nrows, 1, 'int', x_device_id) self._mask = Matrix.empty(self.x.nrows, self.x.ncols, 'float', x_device_id) self.mask = List( [Connector(self._mask[:, i]) for i in xrange(max_len)], self.x.ncols) self.blocking_contexts = None def fprop(self): self.x_context.wait(*self.blocking_contexts) self.y_context.wait(*self.blocking_contexts) data = next(self.data_iterator) lengths_npa = np.array([[len(e) - 1] for e in data], np.int32, order='F') x_npa = np.zeros((len(data), int(np.max(lengths_npa))), np.int32, 'F') for k, e in enumerate(data): x_npa[k, :len(e) - 1] = e[:-1] self.x.assign_npa(self.x_context, x_npa) y_npa = np.zeros((len(data), int(np.max(lengths_npa))), np.int32, 'F') for k, e in enumerate(data): y_npa[k, :len(e) - 1] = e[1:] self._y.assign_npa(self.y_context, y_npa) for e in self.y: e.last_modification_context = self.y_context self.lengths.assign_npa(self.x_context, lengths_npa) self._mask.mask_column_numbers_row_wise(self.x_context, self.lengths) for e in self.mask: e.last_modification_context = self.x_context self.x.fprop() self.y.fprop() self.mask.fprop()
def __init__(self, train_data, valid_data, batch_size, word_dropout_prob, device_id): self.train_data = HomogeneousDataIterator(train_data, batch_size, randomize=True, infinite=True) self.valid_data = HomogeneousDataIterator(valid_data, batch_size) self.train_data_iterator = iter(self.train_data) self.valid_data_iterator = iter(self.valid_data) self.word_keep_prob = 1.0 - word_dropout_prob self.rnd = RandomState(47571) self.unk_idx = word_to_idx['<UNK>'] self.context = Context(device_id) c = Counter([len(line) for line in chain(train_data, valid_data)]) print c.most_common() max_len = max([len(line) for line in chain(train_data, valid_data)]) self.enc_x = Connector(Matrix.empty(batch_size, max_len, 'int', device_id)) self.enc_lengths = Matrix.empty(self.enc_x.nrows, 1, 'int', device_id) self._enc_mask = Matrix.empty(self.enc_x.nrows, self.enc_x.ncols, 'float', device_id) self.enc_mask = List([Connector(self._enc_mask[:, i]) for i in xrange(max_len)], self.enc_x.ncols) self.dec_x = Connector(Matrix.empty(batch_size, max_len + 1, 'int', device_id)) self._dec_y = Matrix.empty(batch_size, max_len + 1, 'int', device_id) self.dec_y = List([Connector(self._dec_y[:, i]) for i in xrange(max_len + 1)], self._dec_y.ncols) self.dec_lengths = Matrix.empty(self.dec_x.nrows, 1, 'int', device_id) self._dec_mask = Matrix.empty(self.dec_x.nrows, self.dec_x.ncols, 'float', device_id) self.dec_mask = List([Connector(self._dec_mask[:, i]) for i in xrange(max_len + 1)], self.dec_x.ncols) self.blocking_contexts = None self.training_mode = True
def test_bprop_vector(self): r = [] for _ in xrange(self.N): embd_dim = self.rng.random_integers(10000) batch_size, output_dim = self.rng.random_integers(2000, size=2) W = self.get_orthogonal_matrix(embd_dim, output_dim) row_idxs = self.rng.randint(embd_dim, size=(batch_size, 1)).astype(np.int32) true_labels = self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32) device_id = 0 output = {} for processor_type in ['gpu', 'cpu']: quagga.processor_type = processor_type qrow_idxs = Connector(Matrix.from_npa(row_idxs)) qtrue_labels = Connector(Matrix.from_npa(true_labels)) qW = Connector(Matrix.from_npa(W), device_id) row_slicing_block = RowSlicingBlock(qW, qrow_idxs) sce_block = SoftmaxCeBlock(row_slicing_block.output, qtrue_labels) qW.fprop() qrow_idxs.fprop() row_slicing_block.fprop() sce_block.fprop() sce_block.bprop() row_slicing_block.bprop() qW.add(Context(), qW.backward_matrix) output[processor_type] = qW.to_host() r.append(np.allclose(output['gpu'], output['cpu'])) self.assertEqual(sum(r), len(r))
def bprop(self): if not self.bpropagable: raise ValueError( 'Nobody was going to use computation from backward ' 'step. You should not backward propagate!') if not self._b_matrices and not self._b_sparse_matrix: # When no one registered for providing derivatives zero dense # matrix will be returned bwd = Matrix.empty_like(self, self._bu_device_id) if self._bu_device_id not in self.context: self.context[self._bu_device_id] = Context(self._bu_device_id) bwd.fill(self.context[self._bu_device_id], 0.0) self._b_matrices[self._bu_device_id] = bwd return bwd if not self._b_matrices and self._b_sparse_matrix: return self._b_sparse_matrix for bo_device_id, bwd_matrix in self._b_matrices.iteritems(): if self._bu_device_id != bo_device_id: self._b_matrices_pool[self._bu_device_id].assign( self.context[self._bu_device_id], bwd_matrix) self._b_matrices[self._bu_device_id].add( self.context[self._bu_device_id], self._b_matrices_pool[self._bu_device_id]) if self._b_sparse_matrix: self._b_matrices[self._bu_device_id].add( self.context[self._bu_device_id], self._b_sparse_matrix) return self._b_matrices[self._bu_device_id]
def __init__(self, kkk, parameters, learning_rate_policy, beta1=0.9, beta2=0.999, epsilon=1e-20): self.kkk = kkk self.parameters = parameters self.m = [] self.v = [] self.contexts = [] for p in self.parameters: m = Matrix.empty_like(p) m.sync_fill(0.0) self.m.append(m) v = Matrix.empty_like(p) v.sync_fill(0.0) self.v.append(v) self.contexts.append(Context(p.device_id)) self.learning_rate_policy = learning_rate_policy self.beta1 = beta1 self.beta2 = beta2 self.epsilon = epsilon self.blocking_contexts = [] self.iteration = 0
def test_theano_grad(self): quagga.processor_type = 'gpu' r = [] for i in xrange(self.N): batch_size, dim = self.rng.random_integers(2000, size=2) y_hat = self.rng.randn(batch_size, dim).astype(dtype=np.float32) y = self.rng.randn(batch_size, dim).astype(dtype=np.float32) # Theano model th_y_hat, th_y = T.fmatrix(), T.fmatrix() loss = T.mean(T.sum((th_y_hat - th_y) ** 2, axis=1)) get_theano_grads = theano.function([th_y_hat, th_y], T.grad(loss, wrt=th_y_hat)) th_dL_dy_hat = get_theano_grads(y_hat, y) # quagga model context = Context() y_hat_gpu = Connector(Matrix.from_npa(y_hat), context, context) y_gpu = Connector(Matrix.from_npa(y)) sigmoid_ce_block = SseBlock(y_hat_gpu, y_gpu) sigmoid_ce_block.fprop() sigmoid_ce_block.bprop() q_dL_dy_hat = y_hat_gpu.backward_matrix.to_host() r.append(np.allclose(th_dL_dy_hat, q_dL_dy_hat)) self.assertEqual(sum(r), self.N)
def __init__(self, matrix, axis=1, device_id=None): self.context = Context(device_id) self._ctype = matrix.c_dtype self._zero = self._ctype(0.0) if axis == 0: self._ones = Matrix.empty(1, matrix.nrows, matrix.dtype, device_id) self.output = Matrix.empty(1, matrix.ncols, matrix.dtype, device_id) self.alpha = self._ctype(1.0 / matrix.nrows) elif axis == 1: self._ones = Matrix.empty(matrix.ncols, 1, matrix.dtype, device_id) self.output = Matrix.empty(matrix.nrows, 1, matrix.dtype, device_id) self.alpha = None else: raise ValueError('Invalid axis!') self._ones.sync_fill(1.0) self.axis = axis if matrix.bpropagable: self.matrix, self.dL_dmatrix = matrix.register_usage( self.context, self.context) self.output = Connector(self.output, self.context, self.context) else: self.matrix = matrix.register_usage(self.context) self.output = Connector(self.output, self.context)
def __init__(self, x, scale_factor=1.0): self.context = Context(x.device_id) device_id = self.context.device_id self.output = x if x.bpropagable: _, self.dL_dx = x.register_usage(device_id, device_id) self.scale_factor = ct.c_float(-scale_factor)
def __init__(self, x, regularization_value): self.context = Context(x.device_id) device_id = self.context.device_id if x.bpropagable: self.x, self.dL_dx = x.register_usage(device_id, device_id) else: self.x = x.register_usage(device_id) self.reg_value = ct.c_float(2 * regularization_value)
def test_bprop(self): """ compare `bprop` results for cpu and gpu backends """ r = [] for i in xrange(self.N): batch_size, x_dim = self.rng.random_integers(3000, size=2) x = self.rng.rand(batch_size, x_dim).astype(np.float32) device_id = 0 for nonlinearity in ['sigmoid', 'tanh', 'relu']: state = self.rng.get_state() quagga.processor_type = 'gpu' x_gpu = Connector(Matrix.from_npa(x), device_id) nonlinearity_block = NonlinearityBlock(x_gpu, nonlinearity) x_gpu.fprop() nonlinearity_block.fprop() _, dL_doutput = nonlinearity_block.output.register_usage( device_id, device_id) random_matrix = self.rng.rand(dL_doutput.nrows, dL_doutput.ncols) dL_doutput.assign(Context(), Matrix.from_npa(random_matrix, 'float')) nonlinearity_block.bprop() dL_dx_gpu = x_gpu.backward_matrix.to_host() self.rng.set_state(state) quagga.processor_type = 'cpu' x_cpu = Connector(Matrix.from_npa(x), device_id) nonlinearity_block = NonlinearityBlock(x_cpu, nonlinearity) x_cpu.fprop() nonlinearity_block.fprop() _, dL_doutput = nonlinearity_block.output.register_usage( device_id, device_id) random_matrix = self.rng.rand(dL_doutput.nrows, dL_doutput.ncols) dL_doutput.assign(Context(), Matrix.from_npa(random_matrix, 'float')) nonlinearity_block.bprop() dL_dx_cpu = x_cpu.backward_matrix.to_host() r.append(np.allclose(dL_dx_gpu, dL_dx_cpu)) self.assertEqual(sum(r), len(r))
def __init__(self, data, char_to_idx, batch_size, x_device_id, y_device_id): self.data = HomogeneousDataIterator(data, char_to_idx, batch_size, True, True) self.data_iterator = iter(self.data) self.x_context = Context(x_device_id) self.y_context = Context(y_device_id) max_len = 0 for sub_line in data: cur_len = len(sub_line) if cur_len > max_len: max_len = cur_len print max_len self.x = Connector(Matrix.empty(batch_size, max_len - 1, 'int', x_device_id)) self._y = Matrix.empty(batch_size, max_len - 1, 'int', y_device_id) self.y = List([Connector(self._y[:, i]) for i in xrange(max_len - 1)], self.x.ncols) self.lengths = Matrix.empty(self.x.nrows, 1, 'int', x_device_id) self._mask = Matrix.empty(self.x.nrows, self.x.ncols, 'float', x_device_id) self.mask = List([Connector(self._mask[:, i]) for i in xrange(max_len)], self.x.ncols) self.blocking_contexts = None
def __init__(self, x, axis, device_id=None): if axis != 1: raise NotImplementedError self.axis = axis self.context = Context(device_id) device_id = self.context.device_id self.x = x.register_usage(device_id) self.output = Connector(Matrix.empty(x.nrows, 1, x.dtype, device_id))
def __init__(self, y_hat, y, device_id=None): if y_hat.nrows != y.nrows or y_hat.ncols != y.ncols: raise ValueError('TODO!') self.context = Context(device_id) if y_hat.bpropagable: self.y_hat, self.dL_dy_hat = y_hat.register_usage(self.context, self.context) else: self.y_hat = y_hat.register_usage(self.context) self.y = y.register_usage(self.context)
def __init__(self, probs, true_labels, schedule, seed, device_id=None): self.schedule = schedule self.rnd = np.random.RandomState(seed) self.context = Context(device_id) device_id = self.context.device_id self.probs = probs.register_usage(device_id) self.true_labels = true_labels.register_usage(device_id) self.output = Connector(Matrix.empty_like(self.true_labels))
class DataBlock(object): def __init__(self, data, char_to_idx, batch_size, x_device_id, y_device_id): self.data = HomogeneousDataIterator(data, char_to_idx, batch_size, True, True) self.data_iterator = iter(self.data) self.x_context = Context(x_device_id) self.y_context = Context(y_device_id) max_len = 0 for sub_line in data: cur_len = len(sub_line) if cur_len > max_len: max_len = cur_len print max_len self.x = Connector(Matrix.empty(batch_size, max_len - 1, 'int', x_device_id)) self._y = Matrix.empty(batch_size, max_len - 1, 'int', y_device_id) self.y = List([Connector(self._y[:, i]) for i in xrange(max_len - 1)], self.x.ncols) self.lengths = Matrix.empty(self.x.nrows, 1, 'int', x_device_id) self._mask = Matrix.empty(self.x.nrows, self.x.ncols, 'float', x_device_id) self.mask = List([Connector(self._mask[:, i]) for i in xrange(max_len)], self.x.ncols) self.blocking_contexts = None def fprop(self): self.x_context.wait(*self.blocking_contexts) self.y_context.wait(*self.blocking_contexts) data = next(self.data_iterator) lengths_npa = np.array([[len(e) - 1] for e in data], np.int32, order='F') x_npa = np.zeros((len(data), int(np.max(lengths_npa))), np.int32, 'F') for k, e in enumerate(data): x_npa[k, :len(e) - 1] = e[:-1] self.x.assign_npa(self.x_context, x_npa) y_npa = np.zeros((len(data), int(np.max(lengths_npa))), np.int32, 'F') for k, e in enumerate(data): y_npa[k, :len(e) - 1] = e[1:] self._y.assign_npa(self.y_context, y_npa) for e in self.y: e.last_modification_context = self.y_context self.lengths.assign_npa(self.x_context, lengths_npa) self._mask.mask_column_numbers_row_wise(self.x_context, self.lengths) for e in self.mask: e.last_modification_context = self.x_context self.x.fprop() self.y.fprop() self.mask.fprop()
def __init__(self, W, col_indexes): device_id = W.device_id self.context = Context(device_id) learning = W.bpropagable if learning: self.W, self.dL_dW = W.register_usage_with_sparse_backward_matrix() else: self.W = W.register_usage(device_id) self.col_indexes = col_indexes.register_usage(device_id) output = Matrix.empty(W.nrows, col_indexes.ncols, device_id=device_id) self.output = Connector(output, device_id if learning else None)
def __init__(self, x_sequence, y_sequence, device_id=None): """ TODO """ # TODO add during hsplit otherwise wrong accumulation of gradients if all(e.bpropagable for e in chain(x_sequence, y_sequence)): learning = True elif all(not e.bpropagable for e in chain(x_sequence, y_sequence)): learning = False else: raise ValueError('All elements should be bpropagable or ' 'non-bpropagable. Mixed state is not allowed!') x_ncols = x_sequence[0].ncols y_ncols = y_sequence[0].ncols dtype = x_sequence[0].dtype for x, y in izip(x_sequence, y_sequence): if x.ncols != x_ncols or y.ncols != y_ncols: raise ValueError( "All matrices in the sequence should have the same number of columns!" ) if x.nrows != y.nrows: raise ValueError( "Can't stack matrices in sequence with different number of rows!" ) if x.dtype != dtype or y.dtype != dtype: raise ValueError("Can't stack matrices with different dtypes!") self.context = Context(device_id) device_id = self.context.device_id if learning: self.x_sequence, self.dL_dx_sequences = izip( *x_sequence.register_usage(device_id, device_id)) self.y_sequence, self.dL_dy_sequences = izip( *y_sequence.register_usage(device_id, device_id)) self.dL_dx_sequences = List(self.dL_dx_sequences, x_sequence.length) self.dL_dy_sequences = List(self.dL_dy_sequences, y_sequence.length) else: self.x_sequence = x_sequence.register_usage(device_id) self.y_sequence = y_sequence.register_usage(device_id) self.x_sequence = List(self.x_sequence, x_sequence.length) self.y_sequence = List(self.y_sequence, y_sequence.length) output = [] for _ in xrange(x_sequence.length): matrix = Matrix.empty(x_sequence[0].nrows, x_ncols + y_ncols, dtype, device_id) output.append(Connector(matrix, device_id)) self.output = List(output, x_sequence.length) if learning: self.dL_dx_sequences = List(self.dL_dx_sequences, x_sequence.length) self.dL_dy_sequences = List(self.dL_dy_sequences, x_sequence.length)
def __init__(self, parameters, learning_rate_policy, momentum_policy): self.parameters = parameters self.velocity = [] for p in self.parameters: v = Matrix.empty_like(p) v.sync_fill(0.0) self.velocity.append(v) self.learning_rate_policy = learning_rate_policy self.momentum_policy = momentum_policy self.contexts = [Context(p.device_id) for p in parameters] self.blocking_contexts = []
def __init__(self, x): device_id = x[0].device_id learning = x[0].bpropagable self.context = Context(device_id) self.output = Matrix.empty_like(x[0]) self.output = Connector(self.output, device_id if learning else None) if learning: self.x, self.dL_dx = izip(*x.register_usage(device_id, device_id)) else: self.x = x.register_usage(device_id) self.last_idx = x.length - 1
def __init__(self, matrices, device_id=None): self.context = Context(device_id) device_id = self.context.device_id self.output = Matrix.empty_like(matrices[0], device_id) learning = matrices[0].bpropagable self.output = Connector(self.output, device_id if learning else None) if learning: self.matrices, self.dL_dmatrices = izip( *matrices.register_usage(device_id, device_id)) else: self.matrices = matrices.register_usage(device_id) self.length = matrices.length
def __init__(self, x, true_labels, mask=None, device_id=None): self.context = Context(device_id) device_id = self.context.device_id if x.bpropagable: self.x, self.dL_dx = x.register_usage(device_id, device_id) else: self.x = x.register_usage(device_id) self.true_labels = true_labels.register_usage(device_id) if mask: self.mask = mask.register_usage(device_id) self.probs = Connector(Matrix.empty_like(self.x)) self.loss = None
def __init__(self, ptb_train, ptb_valid, batch_size, sentence_max_len, device_id): self.blocking_contexts = None self.context = Context(device_id) device_id = self.context.device_id self.train_offsets = HomogeneousDataGenerator(ptb_train, batch_size, sentence_max_len, randomize=True, infinite=True) self.valid_offsets = HomogeneousDataGenerator(ptb_valid, batch_size, sentence_max_len) train_sentences = np.array([self.train_offsets.flatten_sentences]) valid_sentences = np.array([self.valid_offsets.flatten_sentences]) self.train_sents = Matrix.from_npa(train_sentences, 'int', device_id) self.valid_sents = Matrix.from_npa(valid_sentences, 'int', device_id) self._sent_lengths = np.empty((batch_size, 1), dtype=np.int32, order='F')[...] self.sent_lengths = Matrix.from_npa(self._sent_lengths, device_id=device_id) sentence_batch = Matrix.empty(batch_size, sentence_max_len, 'int', device_id) self.sentence_batch = Connector(sentence_batch, self.context) self.sentence_batch.sync_fill(0) self._mask = Matrix.empty(sentence_batch.nrows, self.sentence_batch.ncols, 'float', device_id) self.mask = List( [Connector(self._mask[:, i]) for i in xrange(sentence_max_len)], self.sentence_batch.ncols) self.train_offsets_iterator = iter(self.train_offsets) self.valid_offsets_iterator = iter(self.valid_offsets) self.training_mode = True
def __init__(self, f_matrix, bu_device_id=None): self._fo_device_id = f_matrix.device_id self._f_matrices = {self._fo_device_id: f_matrix} self.context = {self._fo_device_id: Context(self._fo_device_id)} if bu_device_id is not None: self._bu_device_id = bu_device_id self._b_matrices = dict() self._b_matrices_pool = dict() self._b_sparse_matrix = None # We need do this trick because instead we will add attribute # to the Connector instance by setting it # instead of setting attribute in f_matrix self.__f_matrix_setable_attributes = f_matrix.get_setable_attributes() for attr_name in self.__f_matrix_setable_attributes: getattr(self, attr_name)
def __init__(self, train_x, train_y, valid_x, valid_y, batch_size, device_id): self.context = Context(device_id) device_id = self.context.device_id self.train_x = Matrix.from_npa(train_x.T.astype(np.float32), device_id=device_id) self.valid_x = Matrix.from_npa(valid_x.T.astype(np.float32), device_id=device_id) self.train_y = Matrix.from_npa(train_y[:, np.newaxis], 'int', device_id=device_id) self.valid_y = Matrix.from_npa(valid_y[:, np.newaxis], 'int', device_id=device_id) self.batch_size = batch_size x = Matrix.empty(self.batch_size, self.train_x.nrows, device_id=device_id) y = Matrix.empty(self.batch_size, 1, 'int', device_id) self.x = Connector(x) self.y = Connector(y) self.train_indices = np.arange(int(self.train_x.ncols), dtype=np.int32) self.valid_indices = np.arange(int(self.valid_x.ncols), dtype=np.int32) self.indices = Matrix.empty(self.batch_size, 1, 'int', device_id) self.rng = np.random.RandomState(42) self.rng.shuffle(self.train_indices) self.train_i = 0 self.valid_i = 0 self.training_mode = True self.blocking_contexts = None
def __init__(self, parameters, learning_rate_policy, ema_decay=0.9, epsilon=1e-6): self.parameters = parameters self.grad_sqr = [] for p in self.parameters: grad_sqr = Matrix.empty_like(p) grad_sqr.sync_fill(0.0) self.grad_sqr.append(grad_sqr) self.learning_rate_policy = learning_rate_policy self.ema_decay = ema_decay self.epsilon = epsilon self.contexts = [Context(p.device_id) for p in parameters] self.blocking_contexts = []
def __init__(self, ptb_train, ptb_valid, batch_size, sentence_max_len, device_id): self.blocking_contexts = None self.context = Context(device_id) device_id = self.context.device_id self.train_offsets = HomogeneousDataGenerator(ptb_train, batch_size, sentence_max_len, randomize=True, infinite=True) self.valid_offsets = HomogeneousDataGenerator(ptb_valid, batch_size, sentence_max_len) train_sentences = np.array([self.train_offsets.flatten_sentences]) valid_sentences = np.array([self.valid_offsets.flatten_sentences]) self.train_sents = Matrix.from_npa(train_sentences, 'int', device_id) self.valid_sents = Matrix.from_npa(valid_sentences, 'int', device_id) self._sent_lengths = np.empty((batch_size, 1), dtype=np.int32, order='F')[...] self.sent_lengths = Matrix.from_npa(self._sent_lengths, device_id=device_id) sentence_batch = Matrix.empty(batch_size, sentence_max_len, 'int', device_id) self.sentence_batch = Connector(sentence_batch, self.context) self.sentence_batch.sync_fill(0) self._mask = Matrix.empty(sentence_batch.nrows, self.sentence_batch.ncols, 'float', device_id) self.mask = List([Connector(self._mask[:, i]) for i in xrange(sentence_max_len)], self.sentence_batch.ncols) self.train_offsets_iterator = iter(self.train_offsets) self.valid_offsets_iterator = iter(self.valid_offsets) self.training_mode = True
class LstmBlock(object): """ A long short-term memory (LSTM) block. Parameters ---------- W R b grad_clipping x mask prev_c prev_h device_id : int Defines the device's id on which the computation will take place Returns ------- """ def __init__(self, W, R, b, grad_clipping, x, mask, prev_c, prev_h, device_id=None): self.f_context = Context(device_id) device_id = self.f_context.device_id if W.bpropagable: self.W, self.dL_dW = W.register_usage(device_id, device_id) self.W_b_context = Context(device_id) else: self.W = W.register_usage(device_id) if R.bpropagable: self.R, self.dL_dR = R.register_usage(device_id, device_id) self.R_b_context = Context(device_id) else: self.R = R.register_usage(device_id) if b.bpropagable: self.b, self.dL_db = b.register_usage(device_id, device_id) self.b_b_context = Context(device_id) else: self.b = b.register_usage(device_id) self.grad_clipping = grad_clipping if x.bpropagable: self.x, self.dL_dx = x.register_usage(device_id, device_id) self.x_b_context = Context(device_id) else: self.x = x.register_usage(device_id) if mask: self.mask = mask.register_usage(device_id) if prev_c.bpropagable: self.prev_c, self.dL_dprev_c = prev_c.register_usage(device_id, device_id) self.prev_c_b_context = Context(device_id) else: self.prev_c = prev_c.register_usage(device_id) if prev_h.bpropagable: self.prev_h, self.dL_dprev_h = prev_h.register_usage(device_id, device_id) self.prev_h_b_context = Context(device_id) else: self.prev_h = prev_h.register_usage(device_id) self.learning = W.bpropagable or R.bpropagable or x.bpropagable or \ prev_c.bpropagable or prev_h.bpropagable if self.learning: self.b_context = Context(device_id) dim = self.R.nrows batch_size = self.x.nrows self.zifo = Matrix.empty(batch_size, 4 * dim, device_id=device_id) self.z = self.zifo[:, 0*dim:1*dim] self.i = self.zifo[:, 1*dim:2*dim] self.f = self.zifo[:, 2*dim:3*dim] self.o = self.zifo[:, 3*dim:4*dim] self.c = Matrix.empty_like(self.prev_c, device_id) self.c = Connector(self.c, device_id if self.learning else None) self.tanh_c = Matrix.empty_like(self.c, device_id) self.h = Matrix.empty_like(self.c, device_id) self.h = Connector(self.h, device_id if self.learning else None) if self.learning: self._dzifo_dpre_zifo = Matrix.empty_like(self.zifo) self.dz_dpre_z = self._dzifo_dpre_zifo[:, 0*dim:1*dim] self.di_dpre_i = self._dzifo_dpre_zifo[:, 1*dim:2*dim] self.df_dpre_f = self._dzifo_dpre_zifo[:, 2*dim:3*dim] self.do_dpre_o = self._dzifo_dpre_zifo[:, 3*dim:4*dim] self.dL_dpre_zifo = self._dzifo_dpre_zifo self.dL_dpre_z = self.dz_dpre_z self.dL_dpre_i = self.di_dpre_i self.dL_dpre_f = self.df_dpre_f self.dL_dpre_o = self.do_dpre_o self._dtanh_c_dc = Matrix.empty_like(self.c) @property def dzifo_dpre_zifo(self): if self.learning: return self._dzifo_dpre_zifo @property def dtanh_c_dc(self): if self.learning: return self._dtanh_c_dc def fprop(self): # zifo = tanh_sigm(x[t] * W + h[t-1] * R + b) self.zifo.assign_dot(self.f_context, self.x, self.W) self.zifo.add_dot(self.f_context, self.prev_h, self.R) self.zifo.add(self.f_context, self.b) self.zifo.tanh_sigm(self.f_context, self.zifo, self.dzifo_dpre_zifo, axis=1) # c[t] = i[t] .* z[t] + f[t] .* c[t-1] # h[t] = o[t] .* tanh(c[t]) self.c.assign_sum_hprod(self.f_context, self.i, self.z, self.f, self.prev_c) self.c.tanh(self.f_context, self.tanh_c, self.dtanh_c_dc) self.h.assign_hprod(self.f_context, self.o, self.tanh_c) if hasattr(self, 'mask'): # s[t] = mask .* s[t] + (1 - mask) .* s[t-1] self.c.assign_masked_addition(self.f_context, self.mask, self.c, self.prev_c) self.h.assign_masked_addition(self.f_context, self.mask, self.h, self.prev_h) self.c.fprop() self.h.fprop() def bprop(self): dL_dc = self.c.backward_matrix dL_dh = self.h.backward_matrix if hasattr(self, 'mask'): # dL/ds[t-1] = (1 - mask) .* dL/ds[t] # dL/ds[t] = mask .* dL/ds[t] if hasattr(self, 'dL_dprev_c'): self.dL_dprev_c.add_hprod_one_minus_mask(self.prev_c_b_context, self.mask, dL_dc) dL_dc.hprod(self.prev_c_b_context, self.mask) if hasattr(self, 'dL_dprev_h'): self.dL_dprev_h.add_hprod_one_minus_mask(self.prev_h_b_context, self.mask, dL_dh) dL_dh.hprod(self.prev_h_b_context, self.mask) # dL/dc[t] = dL[t+1]/dc[t] + dL/dh[t] .* o[t] .* dtanh(c[t])/dc[t] dL_dc.add_hprod(self.b_context, dL_dh, self.o, self.dtanh_c_dc) # self.dzifo_dpre_zifo was calculated in self.f_context, # now we have to explicitly wait it in context self.b_context, because # self.dx_dpre_x does not have proper last_modif_context self.b_context.wait(self.f_context) # dL/dpre_o[t] = dL/dh[t] .* tanh(c[t]) .* do[t]/dpre_o[t] # dL/dpre_f[t] = dL/dc[t] .* c[t-1] .* df[t]/dpre_f[t] # dL/dpre_i[t] = dL/dc[t] .* z[t] .* di[t]/dpre_i[t] # dL/dpre_z[t] = dL/dc[t] .* i[t] .* dz[t]/dpre_z[t] self.dL_dpre_o.assign_hprod(self.b_context, dL_dh, self.tanh_c, self.do_dpre_o) self.dL_dpre_f.assign_hprod(self.b_context, dL_dc, self.prev_c, self.df_dpre_f) self.dL_dpre_i.assign_hprod(self.b_context, dL_dc, self.z, self.di_dpre_i) self.dL_dpre_z.assign_hprod(self.b_context, dL_dc, self.i, self.dz_dpre_z) if self.grad_clipping: self.dL_dpre_zifo.clip(self.b_context, -self.grad_clipping, self.grad_clipping) else: self.dL_dpre_zifo.last_modif_context = self.b_context if hasattr(self, 'dL_dW'): # dL_dW += x[t].T * dL/dpre_zifo[t] self.dL_dW.add_dot(self.W_b_context, self.x, self.dL_dpre_zifo, 'T') if hasattr(self, 'dL_dR'): # dL_dR += h[t-1].T * dL/dpre_zifo[t] self.dL_dR.add_dot(self.R_b_context, self.prev_h, self.dL_dpre_zifo, 'T') if hasattr(self, 'dL_db'): # dL_db += sum(dL/dpre_zifo[t], axis=0) self.dL_db.add_repeat_derivative(self.b_b_context, self.dL_dpre_zifo, self.dL_dpre_zifo.nrows, axis=0) if hasattr(self, 'dL_dx'): # dL/dx[t] = dL/dpre_zifo[t] * W.T self.dL_dx.add_dot(self.x_b_context, self.dL_dpre_zifo, self.W, 'N', 'T') if hasattr(self, 'dL_dprev_c'): # dL/dc[t-1] = f[t] .* dL/dc[t] self.dL_dprev_c.add_hprod(self.prev_c_b_context, self.f, dL_dc) if hasattr(self, 'dL_dprev_h'): # dL/dh[t-1] = dL/dpre_zifo[t] * R.T self.dL_dprev_h.add_dot(self.prev_h_b_context, self.dL_dpre_zifo, self.R, 'N', 'T')
def __init__(self, W, R, b, grad_clipping, x, mask, prev_c, prev_h, device_id=None): self.f_context = Context(device_id) device_id = self.f_context.device_id if W.bpropagable: self.W, self.dL_dW = W.register_usage(device_id, device_id) self.W_b_context = Context(device_id) else: self.W = W.register_usage(device_id) if R.bpropagable: self.R, self.dL_dR = R.register_usage(device_id, device_id) self.R_b_context = Context(device_id) else: self.R = R.register_usage(device_id) if b.bpropagable: self.b, self.dL_db = b.register_usage(device_id, device_id) self.b_b_context = Context(device_id) else: self.b = b.register_usage(device_id) self.grad_clipping = grad_clipping if x.bpropagable: self.x, self.dL_dx = x.register_usage(device_id, device_id) self.x_b_context = Context(device_id) else: self.x = x.register_usage(device_id) if mask: self.mask = mask.register_usage(device_id) if prev_c.bpropagable: self.prev_c, self.dL_dprev_c = prev_c.register_usage(device_id, device_id) self.prev_c_b_context = Context(device_id) else: self.prev_c = prev_c.register_usage(device_id) if prev_h.bpropagable: self.prev_h, self.dL_dprev_h = prev_h.register_usage(device_id, device_id) self.prev_h_b_context = Context(device_id) else: self.prev_h = prev_h.register_usage(device_id) self.learning = W.bpropagable or R.bpropagable or x.bpropagable or \ prev_c.bpropagable or prev_h.bpropagable if self.learning: self.b_context = Context(device_id) dim = self.R.nrows batch_size = self.x.nrows self.zifo = Matrix.empty(batch_size, 4 * dim, device_id=device_id) self.z = self.zifo[:, 0*dim:1*dim] self.i = self.zifo[:, 1*dim:2*dim] self.f = self.zifo[:, 2*dim:3*dim] self.o = self.zifo[:, 3*dim:4*dim] self.c = Matrix.empty_like(self.prev_c, device_id) self.c = Connector(self.c, device_id if self.learning else None) self.tanh_c = Matrix.empty_like(self.c, device_id) self.h = Matrix.empty_like(self.c, device_id) self.h = Connector(self.h, device_id if self.learning else None) if self.learning: self._dzifo_dpre_zifo = Matrix.empty_like(self.zifo) self.dz_dpre_z = self._dzifo_dpre_zifo[:, 0*dim:1*dim] self.di_dpre_i = self._dzifo_dpre_zifo[:, 1*dim:2*dim] self.df_dpre_f = self._dzifo_dpre_zifo[:, 2*dim:3*dim] self.do_dpre_o = self._dzifo_dpre_zifo[:, 3*dim:4*dim] self.dL_dpre_zifo = self._dzifo_dpre_zifo self.dL_dpre_z = self.dz_dpre_z self.dL_dpre_i = self.di_dpre_i self.dL_dpre_f = self.df_dpre_f self.dL_dpre_o = self.do_dpre_o self._dtanh_c_dc = Matrix.empty_like(self.c)
class MnistMiniBatchesGenerator(object): def __init__(self, train_x, train_y, valid_x, valid_y, batch_size, device_id): self.context = Context(device_id) device_id = self.context.device_id self.train_x = Matrix.from_npa(train_x.T.astype(np.float32), device_id=device_id) self.valid_x = Matrix.from_npa(valid_x.T.astype(np.float32), device_id=device_id) self.train_y = Matrix.from_npa(train_y[:, np.newaxis], 'int', device_id=device_id) self.valid_y = Matrix.from_npa(valid_y[:, np.newaxis], 'int', device_id=device_id) self.batch_size = batch_size x = Matrix.empty(self.batch_size, self.train_x.nrows, device_id=device_id) y = Matrix.empty(self.batch_size, 1, 'int', device_id) self.x = Connector(x) self.y = Connector(y) self.train_indices = np.arange(int(self.train_x.ncols), dtype=np.int32) self.valid_indices = np.arange(int(self.valid_x.ncols), dtype=np.int32) self.indices = Matrix.empty(self.batch_size, 1, 'int', device_id) self.rng = np.random.RandomState(42) self.rng.shuffle(self.train_indices) self.train_i = 0 self.valid_i = 0 self.training_mode = True self.blocking_contexts = None def set_training_mode(self): self.training_mode = True def set_testing_mode(self): self.training_mode = False def fprop(self): indices = self.train_indices if self.training_mode else self.valid_indices i = self.train_i if self.training_mode else self.valid_i x = self.train_x if self.training_mode else self.valid_x y = self.train_y if self.training_mode else self.valid_y indices = indices[self.batch_size * i:self.batch_size * (i + 1)] indices = np.asfortranarray(indices[:, np.newaxis]) if self.training_mode: self.train_i += 1 else: self.valid_i += 1 if indices.size: self.indices.assign_npa(self.context, indices) self.x.nrows = indices.size self.y.nrows = indices.size self.context.wait(*self.blocking_contexts) x.slice_columns_and_transpose(self.context, self.indices, self.x) y.slice_rows(self.context, self.indices, self.y) self.x.fprop() self.y.fprop() else: if self.training_mode: self.train_i = 0 self.rng.shuffle(self.train_indices) self.fprop() else: self.valid_i = 0 raise StopIteration()