示例#1
0
    def register_usage(self, fu_device_id, bo_device_id=None):
        """
        Register usage of connector's forward_matrix.

        :param fu_device_id: context in which `forward_matrix` will be used
        :param bo_device_id: context in which `backward_matrix`
                                    of the connector will be calculated
        """

        if not self.bpropagable and bo_device_id:
            raise ValueError(
                "Nobody is going to use computation from backward step. "
                "You mustn't register for backward propagate!")
        if fu_device_id != self._fo_device_id and fu_device_id not in self._f_matrices:
            self._f_matrices[fu_device_id] = Matrix.empty_like(
                self, fu_device_id)
            self.context[fu_device_id] = Context(fu_device_id)
        if bo_device_id is None:
            return self._f_matrices[fu_device_id]

        for device_id in [self._bu_device_id, bo_device_id]:
            if device_id not in self._b_matrices:
                self._b_matrices[device_id] = Matrix.empty_like(
                    self, device_id)
                if device_id not in self.context:
                    self.context[device_id] = Context(device_id)
        if self._bu_device_id != bo_device_id and self._bu_device_id not in self._b_matrices_pool:
            self._b_matrices_pool[self._bu_device_id] = Matrix.empty_like(
                self, self._bu_device_id)
        return self._f_matrices[fu_device_id], self._b_matrices[bo_device_id]
示例#2
0
    def __init__(self, x, nonlinearity, device_id=None):
        """


        """
        self.f_context = Context(device_id)
        device_id = self.f_context.device_id
        self.learning = x.bpropagable
        if self.learning:
            self.b_context = Context(device_id)
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
            self._df_dpref = Matrix.empty_like(self.x, device_id)
        else:
            self.x = x.register_usage(device_id)
        output = Matrix.empty_like(x, device_id)
        self.output = Connector(output, device_id if self.learning else None)
        if nonlinearity == "sigmoid":
            self.f = self.x.sigmoid
        elif nonlinearity == "tanh":
            self.f = self.x.tanh
        elif nonlinearity == "relu":
            self.f = self.x.relu
        elif nonlinearity == "softmax":
            raise ValueError("For softmax nonlinearity use SoftmaxBlock!")
        else:
            raise ValueError("TODO!")
        self.training_mode = True
示例#3
0
    def register_usage(self, fu_device_id, bo_device_id=None):
        """
        Register usage of connector's forward_matrix.

        :param fu_device_id: context in which `forward_matrix` will be used
        :param bo_device_id: context in which `backward_matrix`
                                    of the connector will be calculated
        """

        if not self.bpropagable and bo_device_id:
            raise ValueError("Nobody is going to use computation from backward step. "
                             "You mustn't register for backward propagate!")
        if fu_device_id != self._fo_device_id and fu_device_id not in self._f_matrices:
            self._f_matrices[fu_device_id] = Matrix.empty_like(self, fu_device_id)
            self.context[fu_device_id] = Context(fu_device_id)
        if bo_device_id is None:
            return self._f_matrices[fu_device_id]

        for device_id in [self._bu_device_id, bo_device_id]:
            if device_id not in self._b_matrices:
                self._b_matrices[device_id] = Matrix.empty_like(self, device_id)
                if device_id not in self.context:
                    self.context[device_id] = Context(device_id)
        if self._bu_device_id != bo_device_id and self._bu_device_id not in self._b_matrices_pool:
            self._b_matrices_pool[self._bu_device_id] = Matrix.empty_like(self, self._bu_device_id)
        return self._f_matrices[fu_device_id], self._b_matrices[bo_device_id]
示例#4
0
    def __init__(self, R, b, grad_clipping, mask, prev_c, prev_h, device_id=None):
        self.f_context = Context(device_id)
        device_id = self.f_context.device_id
        if R.bpropagable:
            self.R, self.dL_dR = R.register_usage(device_id, device_id)
            self.R_b_context = Context(device_id)
        else:
            self.R = R.register_usage(device_id)
        if b.bpropagable:
            self.b, self.dL_db = b.register_usage(device_id, device_id)
            self.b_b_context = Context(device_id)
        else:
            self.b = b.register_usage(device_id)
        self.grad_clipping = grad_clipping
        if mask:
            self.mask = mask.register_usage(device_id)
        if prev_c.bpropagable:
            self.prev_c, self.dL_dprev_c = prev_c.register_usage(device_id, device_id)
        else:
            self.prev_c = prev_c.register_usage(device_id)
        if prev_h.bpropagable:
            self.prev_h, self.dL_dprev_h = prev_h.register_usage(device_id, device_id)
        else:
            self.prev_h = prev_h.register_usage(device_id)
        self.learning = R.bpropagable or prev_c.bpropagable or prev_h.bpropagable
        if self.learning:
            self.b_context = Context(device_id)

        dim = self.R.nrows
        batch_size = self.prev_c.nrows

        self.zifo = Matrix.empty(batch_size, 4 * dim, device_id=device_id)
        self.z = self.zifo[:, 0*dim:1*dim]
        self.i = self.zifo[:, 1*dim:2*dim]
        self.f = self.zifo[:, 2*dim:3*dim]
        self.o = self.zifo[:, 3*dim:4*dim]
        self.c = Matrix.empty_like(self.prev_c, device_id)
        self.c = Connector(self.c, device_id if self.learning else None)
        self.tanh_c = Matrix.empty_like(self.c, device_id)
        self.h = Matrix.empty_like(self.c, device_id)
        self.h = Connector(self.h, device_id if self.learning else None)

        if self.learning:
            self._dzifo_dpre_zifo = Matrix.empty_like(self.zifo)
            self.dz_dpre_z = self._dzifo_dpre_zifo[:, 0*dim:1*dim]
            self.di_dpre_i = self._dzifo_dpre_zifo[:, 1*dim:2*dim]
            self.df_dpre_f = self._dzifo_dpre_zifo[:, 2*dim:3*dim]
            self.do_dpre_o = self._dzifo_dpre_zifo[:, 3*dim:4*dim]
            self.dL_dpre_zifo = self._dzifo_dpre_zifo
            self.dL_dpre_z = self.dz_dpre_z
            self.dL_dpre_i = self.di_dpre_i
            self.dL_dpre_f = self.df_dpre_f
            self.dL_dpre_o = self.do_dpre_o
            self._dtanh_c_dc = Matrix.empty_like(self.c)
示例#5
0
    def bprop(self):
        if not self.bpropagable:
            raise ValueError(
                'Nobody was going to use computation from backward '
                'step. You should not backward propagate!')
        if not self._b_matrices and not self._b_sparse_matrix:
            # When no one registered for providing derivatives zero dense
            # matrix will be returned
            bwd = Matrix.empty_like(self, self._bu_device_id)
            if self._bu_device_id not in self.context:
                self.context[self._bu_device_id] = Context(self._bu_device_id)
            bwd.fill(self.context[self._bu_device_id], 0.0)
            self._b_matrices[self._bu_device_id] = bwd
            return bwd

        if not self._b_matrices and self._b_sparse_matrix:
            return self._b_sparse_matrix

        for bo_device_id, bwd_matrix in self._b_matrices.iteritems():
            if self._bu_device_id != bo_device_id:
                self._b_matrices_pool[self._bu_device_id].assign(
                    self.context[self._bu_device_id], bwd_matrix)
                self._b_matrices[self._bu_device_id].add(
                    self.context[self._bu_device_id],
                    self._b_matrices_pool[self._bu_device_id])
        if self._b_sparse_matrix:
            self._b_matrices[self._bu_device_id].add(
                self.context[self._bu_device_id], self._b_sparse_matrix)
        return self._b_matrices[self._bu_device_id]
示例#6
0
 def __init__(self, parameters, learning_rate_policy, momentum_policy, ema_decay=0.9, epsilon=1e-6):
     self.parameters = parameters
     self.grad_sqr = []
     self.velocity = []
     for p in self.parameters:
         grad_sqr = Matrix.empty_like(p)
         grad_sqr.sync_fill(0.0)
         self.grad_sqr.append(grad_sqr)
         v = Matrix.empty_like(p)
         v.sync_fill(0.0)
         self.velocity.append(v)
     self.learning_rate_policy = learning_rate_policy
     self.momentum_policy = momentum_policy
     self.ema_decay = ema_decay
     self.epsilon = epsilon
     self.contexts = [Context(p.device_id) for p in parameters]
     self.blocking_contexts = []
示例#7
0
    def __init__(self, probs, true_labels, schedule, seed, device_id=None):
        self.schedule = schedule
        self.rnd = np.random.RandomState(seed)
        self.context = Context(device_id)
        device_id = self.context.device_id

        self.probs = probs.register_usage(device_id)
        self.true_labels = true_labels.register_usage(device_id)
        self.output = Connector(Matrix.empty_like(self.true_labels))
示例#8
0
 def __init__(self, parameters, learning_rate_policy, beta1=0.9, beta2=0.999, epsilon=1e-8):
     self.parameters = parameters
     self.m = []
     self.v = []
     self.contexts = []
     for p in self.parameters:
         m = Matrix.empty_like(p)
         m.sync_fill(0.0)
         self.m.append(m)
         v = Matrix.empty_like(p)
         v.sync_fill(0.0)
         self.v.append(v)
         self.contexts.append(Context(p.device_id))
     self.learning_rate_policy = learning_rate_policy
     self.beta1 = beta1
     self.beta2 = beta2
     self.epsilon = epsilon
     self.blocking_contexts = []
     self.iteration = 0
示例#9
0
 def __init__(self, x, device_id=None):
     self.context = Context(device_id)
     device_id = self.context.device_id
     self.learning = x.bpropagable
     if self.learning:
         self.x, self.dL_dx = x.register_usage(device_id, device_id)
     else:
         self.x = x.register_usage(device_id)
     self.x = x.register_usage(device_id)
     self.output = Connector(Matrix.empty_like(self.x), device_id if self.learning else None)
示例#10
0
 def __init__(self, parameters, learning_rate_policy, momentum_policy):
     self.parameters = parameters
     self.velocity = []
     for p in self.parameters:
         v = Matrix.empty_like(p)
         v.sync_fill(0.0)
         self.velocity.append(v)
     self.learning_rate_policy = learning_rate_policy
     self.momentum_policy = momentum_policy
     self.contexts = [Context(p.device_id) for p in parameters]
     self.blocking_contexts = []
示例#11
0
 def __init__(self, x):
     device_id = x[0].device_id
     learning = x[0].bpropagable
     self.context = Context(device_id)
     self.output = Matrix.empty_like(x[0])
     self.output = Connector(self.output, device_id if learning else None)
     if learning:
         self.x, self.dL_dx = izip(*x.register_usage(device_id, device_id))
     else:
         self.x = x.register_usage(device_id)
     self.last_idx = x.length - 1
示例#12
0
 def __init__(self, x):
     device_id = x[0].device_id
     learning = x[0].bpropagable
     self.context = Context(device_id)
     self.output = Matrix.empty_like(x[0])
     self.output = Connector(self.output, device_id if learning else None)
     if learning:
         self.x, self.dL_dx = izip(*x.register_usage(device_id, device_id))
     else:
         self.x = x.register_usage(device_id)
     self.last_idx = x.length - 1
 def __init__(self, matrices, device_id=None):
     self.context = Context(device_id)
     device_id = self.context.device_id
     self.output = Matrix.empty_like(matrices[0], device_id)
     learning = matrices[0].bpropagable
     self.output = Connector(self.output, device_id if learning else None)
     if learning:
         self.matrices, self.dL_dmatrices = izip(*matrices.register_usage(device_id, device_id))
     else:
         self.matrices = matrices.register_usage(device_id)
     self.length = matrices.length
示例#14
0
 def __init__(self, parameters, learning_rate_policy, momentum_policy):
     self.parameters = parameters
     self.velocity = []
     for p in self.parameters:
         v = Matrix.empty_like(p)
         v.sync_fill(0.0)
         self.velocity.append(v)
     self.learning_rate_policy = learning_rate_policy
     self.momentum_policy = momentum_policy
     self.contexts = [Context(p.device_id) for p in parameters]
     self.blocking_contexts = []
示例#15
0
 def __init__(self, x, true_labels, mask=None, device_id=None):
     self.context = Context(device_id)
     device_id = self.context.device_id
     if x.bpropagable:
         self.x, self.dL_dx = x.register_usage(device_id, device_id)
     else:
         self.x = x.register_usage(device_id)
     self.true_labels = true_labels.register_usage(device_id)
     if mask:
         self.mask = mask.register_usage(device_id)
     self.probs = Connector(Matrix.empty_like(self.x))
     self.loss = None
示例#16
0
 def __init__(self, matrices, device_id=None):
     self.context = Context(device_id)
     device_id = self.context.device_id
     self.output = Matrix.empty_like(matrices[0], device_id)
     learning = matrices[0].bpropagable
     self.output = Connector(self.output, device_id if learning else None)
     if learning:
         self.matrices, self.dL_dmatrices = izip(
             *matrices.register_usage(device_id, device_id))
     else:
         self.matrices = matrices.register_usage(device_id)
     self.length = matrices.length
示例#17
0
 def __init__(self, x, true_labels, mask=None, device_id=None):
     self.context = Context(device_id)
     device_id = self.context.device_id
     if x.bpropagable:
         self.x, self.dL_dx = x.register_usage(device_id, device_id)
     else:
         self.x = x.register_usage(device_id)
     self.true_labels = true_labels.register_usage(device_id)
     if mask:
         self.mask = mask.register_usage(device_id)
     self.probs = Connector(Matrix.empty_like(self.x))
     self.loss = None
示例#18
0
 def __init__(self, dropout_prob, x, seed=42, device_id=None):
     self.dropout_prob = dropout_prob
     self.f_context = Context(device_id)
     device_id = self.f_context.device_id
     self.generator = Matrix.get_random_generator(seed)
     if x.bpropagable:
         self.b_context = Context(device_id)
         self.x, self.dL_dx = x.register_usage(device_id, device_id)
     else:
         self.x = x.register_usage(device_id)
     self.output = Matrix.empty_like(self.x)
     self.output = Connector(self.output, device_id if x.bpropagable else None)
     self.training_mode = True
示例#19
0
 def __init__(self, matrices, u, mask=None, device_id=None):
     self.context = Context(device_id)
     device_id = self.context.device_id
     self.output = Matrix.empty_like(matrices[0], device_id)
     learning = matrices[0].bpropagable or u.bpropagable
     self.output = Connector(self.output, device_id if learning else None)
     if matrices[0].bpropagable:
         self.matrices, self.dL_dmatrices = \
             izip(*matrices.register_usage(device_id, device_id))
     else:
         self.matrices = matrices.register_usage(device_id)
     self.length = matrices.length
     if u.bpropagable:
         self.u, self.dL_du = u.register_usage(device_id, device_id)
     else:
         self.u = u.register_usage(device_id)
     if mask:
         self.mask = mask.register_usage(device_id)
     self.a = Matrix.empty(matrices[0].nrows, matrices.length,
                           'float', device_id)
     self.dL_dpre_a = Matrix.empty_like(self.a)
     self.a_cols = [self.a[:, i] for i in xrange(len(self.matrices))]
示例#20
0
 def __init__(self, matrices, u, mask=None, device_id=None):
     self.context = Context(device_id)
     device_id = self.context.device_id
     self.output = Matrix.empty_like(matrices[0], device_id)
     learning = matrices[0].bpropagable or u.bpropagable
     self.output = Connector(self.output, device_id if learning else None)
     if matrices[0].bpropagable:
         self.matrices, self.dL_dmatrices = \
             izip(*matrices.register_usage(device_id, device_id))
     else:
         self.matrices = matrices.register_usage(device_id)
     self.length = matrices.length
     if u.bpropagable:
         self.u, self.dL_du = u.register_usage(device_id, device_id)
     else:
         self.u = u.register_usage(device_id)
     if mask:
         self.mask = mask.register_usage(device_id)
     self.a = Matrix.empty(matrices[0].nrows, matrices.length, 'float',
                           device_id)
     self.dL_dpre_a = Matrix.empty_like(self.a)
     self.a_cols = [self.a[:, i] for i in xrange(len(self.matrices))]
示例#21
0
    def bprop(self):
        if not self.bpropagable:
            raise ValueError('Nobody was going to use computation from backward '
                             'step. You should not backward propagate!')
        if not self._b_matrices and not self._b_sparse_matrix:
            # When no one registered for providing derivatives zero dense
            # matrix will be returned
            bwd = Matrix.empty_like(self, self._bu_device_id)
            if self._bu_device_id not in self.context:
                self.context[self._bu_device_id] = Context(self._bu_device_id)
            bwd.fill(self.context[self._bu_device_id], 0.0)
            self._b_matrices[self._bu_device_id] = bwd
            return bwd

        if not self._b_matrices and self._b_sparse_matrix:
            return self._b_sparse_matrix

        for bo_device_id, bwd_matrix in self._b_matrices.iteritems():
            if self._bu_device_id != bo_device_id:
                self._b_matrices_pool[self._bu_device_id].assign(self.context[self._bu_device_id], bwd_matrix)
                self._b_matrices[self._bu_device_id].add(self.context[self._bu_device_id], self._b_matrices_pool[self._bu_device_id])
        if self._b_sparse_matrix:
            self._b_matrices[self._bu_device_id].add(self.context[self._bu_device_id], self._b_sparse_matrix)
        return self._b_matrices[self._bu_device_id]
示例#22
0
    def __init__(self,
                 R,
                 b,
                 grad_clipping,
                 mask,
                 prev_c,
                 prev_h,
                 device_id=None):
        self.f_context = Context(device_id)
        device_id = self.f_context.device_id
        if R.bpropagable:
            self.R, self.dL_dR = R.register_usage(device_id, device_id)
            self.R_b_context = Context(device_id)
        else:
            self.R = R.register_usage(device_id)
        if b.bpropagable:
            self.b, self.dL_db = b.register_usage(device_id, device_id)
            self.b_b_context = Context(device_id)
        else:
            self.b = b.register_usage(device_id)
        self.grad_clipping = grad_clipping
        if mask:
            self.mask = mask.register_usage(device_id)
        if prev_c.bpropagable:
            self.prev_c, self.dL_dprev_c = prev_c.register_usage(
                device_id, device_id)
        else:
            self.prev_c = prev_c.register_usage(device_id)
        if prev_h.bpropagable:
            self.prev_h, self.dL_dprev_h = prev_h.register_usage(
                device_id, device_id)
        else:
            self.prev_h = prev_h.register_usage(device_id)
        self.learning = R.bpropagable or prev_c.bpropagable or prev_h.bpropagable
        if self.learning:
            self.b_context = Context(device_id)

        dim = self.R.nrows
        batch_size = self.prev_c.nrows

        self.zifo = Matrix.empty(batch_size, 4 * dim, device_id=device_id)
        self.z = self.zifo[:, 0 * dim:1 * dim]
        self.i = self.zifo[:, 1 * dim:2 * dim]
        self.f = self.zifo[:, 2 * dim:3 * dim]
        self.o = self.zifo[:, 3 * dim:4 * dim]
        self.c = Matrix.empty_like(self.prev_c, device_id)
        self.c = Connector(self.c, device_id if self.learning else None)
        self.tanh_c = Matrix.empty_like(self.c, device_id)
        self.h = Matrix.empty_like(self.c, device_id)
        self.h = Connector(self.h, device_id if self.learning else None)

        if self.learning:
            self._dzifo_dpre_zifo = Matrix.empty_like(self.zifo)
            self.dz_dpre_z = self._dzifo_dpre_zifo[:, 0 * dim:1 * dim]
            self.di_dpre_i = self._dzifo_dpre_zifo[:, 1 * dim:2 * dim]
            self.df_dpre_f = self._dzifo_dpre_zifo[:, 2 * dim:3 * dim]
            self.do_dpre_o = self._dzifo_dpre_zifo[:, 3 * dim:4 * dim]
            self.dL_dpre_zifo = self._dzifo_dpre_zifo
            self.dL_dpre_z = self.dz_dpre_z
            self.dL_dpre_i = self.di_dpre_i
            self.dL_dpre_f = self.df_dpre_f
            self.dL_dpre_o = self.do_dpre_o
            self._dtanh_c_dc = Matrix.empty_like(self.c)