示例#1
0
def evals(net, adata ,alabel, batch_size):
    hidden = net.begin_state(func=mx.nd.zeros,batch_size = batch_size,ctx=mx.cpu())
    dataLoader = DataLoader(adata, alabel)
    tl = 0
    for data, label in dataLoader.dataIter(batch_size):
        label = nd.array(label)
        #label = nd.ones(shape=(5,batch_size)) * label
        #label = label.reshape((-1,))
        dd = nd.array(data.reshape((batch_size,5,11)).swapaxes(0,1))
        #hidden = detach(hidden)
        output,hidden = net(dd, hidden)
        output = output.reshape((5,batch_size,1))
        output = nd.sum(output,axis=0)/5
        lv = loss(output, label)

        tl += nd.sum(lv).asscalar()
    return tl / len(adata)
示例#2
0
def grad_clipping(params, theta, ctx):
    """Gradient clipping."""
    if theta is not None:
        norm = nd.array([0.0], ctx)
        for p in params:
            norm += nd.sum(p.grad ** 2)
        norm = nd.sqrt(norm).asscalar()
        if norm > theta:
            for p in params:
                p.grad[:] *= theta / norm
示例#3
0
def grad_clipping(params, clipping_norm, ctx):
    """Gradient clipping."""
    if clipping_norm is not None:
        norm = nd.array([0.0], ctx)
        for p in params:
            norm += nd.sum(p.grad ** 2)
        norm = nd.sqrt(norm).asscalar()
        if norm > clipping_norm:
            for p in params:
                p.grad[:] *= clipping_norm / norm
示例#4
0
def evaluate(ctx, net, data_loader):
    n, total_succ = 0, 0

    for X, y_expect in data_loader:
        X, y_expect = X.as_in_context(ctx), y_expect.as_in_context(ctx)

        y = net(X)
        total_succ += nd.sum(y.argmax(axis=1) == y_expect).asscalar()
        n += X.shape[0]

    return 100 * total_succ / n
示例#5
0
def predict(net, data, label):
    data = nd.array(data)
    label = nd.array(label)
    hidden = net.begin_state(func=mx.nd.zeros,batch_size = data.shape[0],ctx=mx.cpu())
    dd = nd.array(data.reshape((data.shape[0],5,11)).swapaxes(0,1))
    output,hidden = net(dd,hidden)
    output = output.reshape((5,data.shape[0],1))
    output = nd.sum(output,axis=0)/5
    l = nd.argmax(output, axis=1)
    res = nd.mean(l==label)
    return res.asscalar()
    def forward(self,X,lrp_aware=False):
        '''
        Realizes the forward pass of an input through the convolution layer.

        Parameters
        ----------
        X :         mxnet.ndarray.ndarray.NDArray
                    a network input, shaped (N,H,W,D), with
                    N = batch size
                    H, W, D = input size in heigth, width, depth

        lrp_aware : bool
                    controls whether the forward pass is to be computed with awareness for multiple following
                    LRP calls. this will sacrifice speed in the forward pass but will save time if multiple LRP
                    calls will follow for the current X, e.g. wit different parameter settings or for multiple
                    target classes.

        Returns
        -------
        Y :         mxnet.ndarray.ndarray.NDArray
                    the layer outputs.
        '''

        self.lrp_aware = lrp_aware

        self.X = X
        N,H,W,D = X.shape

        hf, wf, df, nf  = self.W.shape
        hstride, wstride = self.stride
        numfilters = self.n

        #assume the given pooling and stride parameters are carefully chosen.
        Hout = (H - hf) // hstride + 1
        Wout = (W - wf) // wstride + 1


        #initialize pooled output
        self.Y = nd.zeros((N,Hout,Wout,numfilters), ctx=self.ctx, dtype=self.dtype)

        if self.lrp_aware:
            self.Z = nd.zeros((N, Hout, Wout, hf, wf, df, nf), ctx=self.ctx, dtype=self.dtype) #initialize container for precomputed forward messages
            for i in range(Hout):
                for j in range(Wout):
                    self.Z[:,i,j,...] = nd.expand_dims(self.W, axis=0) * nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , :], axis=4) # N, hf, wf, df, nf
                    self.Y[:,i,j,:] = self.Z[:,i,j,...].sum(axis=(1,2,3)) + self.B
        else:
            for i in range(Hout):
                for j in range(Wout):
                    self.Y[:,i,j,:] = nd.sum( nd.expand_dims( X[:, i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ].transpose((1,2,3,0)), 4) * nd.expand_dims(self.W, 3), axis=(0,1,2))  + self.B

        return self.Y
示例#7
0
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        if num_pos_all < 1:
            # no positive samples found, return dummy losses
            return nd.zeros((1,)), nd.zeros((1,)), nd.zeros((1,))

        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < (pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss))
            cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / num_pos_all)

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / num_pos_all)
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
示例#8
0
文件: func.py 项目: chr5tphr/ecGAN
def stats_batchwise(x_bat, y_bat, n, x_mean, y_mean, x_var=None, y_var=None, xx_cov=None, yy_cov=None, xy_cov=None, x_mean_skip=False, y_mean_skip=False):
    m = x_bat.shape[0]

    x_bat_mean = x_bat.mean(axis=0, keepdims=True)
    y_bat_mean = y_bat.mean(axis=0, keepdims=True)

    dx = x_bat - x_bat_mean
    dy = y_bat - y_bat_mean

    if x_var is not None:
        x_bat_var = nd.sum(dx**2, axis=0)
        x_var += x_bat_var + ((x_mean - x_bat_mean)**2) * n * m / (n+m)

    if y_var is not None:
        y_bat_var = nd.sum(dy**2, axis=0)
        y_var += y_bat_var + ((y_mean - y_bat_mean)**2) * n * m / (n+m)

    if xx_cov is not None:
        xx_bat_cov = nd.dot(dx, dx, transpose_a=True)
        xx_cov += xx_bat_cov + nd.dot((x_mean - x_bat_mean), (x_mean - x_bat_mean), transpose_a=True) * n * m / (n+m)

    if yy_cov is not None:
        yy_bat_cov = nd.dot(dy, dy, transpose_a=True)
        yy_cov += yy_bat_cov + nd.dot((y_mean - y_bat_mean), (y_mean - y_bat_mean), transpose_a=True) * n * m / (n+m)

    if xy_cov is not None:
        xy_bat_cov = nd.dot(dy, dx, transpose_a=True)
        xy_cov += xy_bat_cov + nd.dot((y_mean - y_bat_mean), (x_mean - x_bat_mean), transpose_a=True) * n * m / (n+m)

    if not x_mean_skip:
        x_mean = (n * x_mean + m * x_bat_mean) / (n+m)

    if not y_mean_skip:
        y_mean = (n * y_mean + m * y_bat_mean) / (n+m)

    n += m

    return n, x_mean, y_mean, x_var, y_var, xx_cov, yy_cov, xy_cov
示例#9
0
def evaluate_accuracy(data_iterator, net, ctx=[mx.cpu()]):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    acc = nd.array([0])
    n = 0.
    if isinstance(data_iterator, mx.io.MXDataIter):
        data_iterator.reset()
    for batch in data_iterator:
        data, label, batch_size = _get_batch(batch, ctx)
        for X, y in zip(data, label):
            acc += nd.sum(net(X).argmax(axis=1) == y).copyto(mx.cpu())
            n += y.size
        acc.wait_to_read()  # don't push too many operators into backend
    return acc.asscalar() / n
示例#10
0
def train_one_epoch(ctx, net, floss, trainer, data_loader):
    n, total_loss, total_succ = 0, 0, 0

    for X, y_expect in data_loader:
        X, y_expect = X.as_in_context(ctx), y_expect.as_in_context(ctx)

        with autograd.record():
            y = net(X)
            loss = floss(y, y_expect)

        loss.backward()
        trainer.step(X.shape[0])

        total_loss += loss.sum().asscalar()
        total_succ += nd.sum(y.argmax(axis=1) == y_expect).asscalar()
        n += X.shape[0]

    return 100 * total_succ / n, total_loss / n
示例#11
0
    def _spectral_norm(self):
        """ spectral normalization """
        w = self.params.get('weight').data(self.ctx)
        w_mat = nd.reshape(w, [w.shape[0], -1])

        _u = self.u.data(self.ctx)
        _v = None

        for _ in range(POWER_ITERATION):
            _v = nd.L2Normalization(nd.dot(_u, w_mat))
            _u = nd.L2Normalization(nd.dot(_v, w_mat.T))

        sigma = nd.sum(nd.dot(_u, w_mat) * _v)
        if sigma == 0.:
            sigma = EPSILON

        self.params.setattr('u', _u)

        return w / sigma
示例#12
0
文件: func.py 项目: chr5tphr/ecGAN
def batchwise_covariance(X, Y):
        meanx = meany = vary = n = C = 0
        for x, y in zip(X, Y):
            m = len(x)
            meanx_ = x.mean(axis=0, keepdims=True)
            meany_ = y.mean(axis=0, keepdims=True)
            dx = x - meanx_
            dy = y - meany_

            C_ = nd.dot(dx, dy, transpose_a=True)
            C += C_ + nd.dot((meanx - meanx_), (meany - meany_), transpose_a=True) * n * m / (n+m)

            vary_ = nd.sum(dy**2, axis=0)
            vary += vary_ + ((meany - meany_)**2) * n * m / (n+m)

            meanx = (n * meanx + m * meanx_) / (n+m)
            meany = (n * meany + m * meany_) / (n+m)
            n += m
        return C / n, vary / n
    def update(self,lrate):
        N,Hx,Wx,Dx = self.X.shape
        N,Hy,Wy,NF = self.DY.shape

        hf,wf,df,NF = self.W.shape
        hstride, wstride = self.stride

        DW = nd.zeros_like(self.W,ctx=self.ctx, dtype=self.dtype)

        if not (hf == wf and self.stride == (1,1)):
            for i in range(Hy):
                for j in range(Wy):
                    DW += ( nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , :], axis=4) * nd.expand_dims(self.DY[:,i:i+1,j:j+1,:], axis=3)).sum(axis=0)
        else:
            for i in range(hf):
                for j in range(wf):
                    DW[i,j,:,:] = nd.sum( nd.expand_dims(self.X[:,i:i+Hy:hstride,j:j+Wy:wstride,:], axis=4) * nd.expand_dims(self.DY, axis=3) ,axis=(0,1,2))

        DB = self.DY.sum(axis=(0,1,2))
        self.W -= lrate * DW / (hf*wf*df*Hy*Wy)**.5
        self.B -= lrate * DB / (Hy*Wy)**.5
示例#14
0
 def forward(self, x):
     x = nd.sqrt(nd.sum(nd.square(x), 2))
     return x
示例#15
0
文件: gru.py 项目: HaoranYi/gitProj
def cross_entropy(yhat, y):
    return - nd.mean(nd.sum(y * nd.log(yhat), axis=0, exclude=True))
示例#16
0
smoothing_constant = .01
num_examples = 60000

for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_iter):
        # ==== note the difference in raw data input shape ====
        # use 4d tensor (batch_size, 1, 28, 28)
        data = data.as_in_context(model_ctx)
        label = label.as_in_context(model_ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()

    test_accuracy = evaluate_accuracy(test_iter, net)
    train_accuracy = evaluate_accuracy(train_iter, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss / num_examples, train_accuracy, test_accuracy))

# show test image
plt.imshow(test_img[0], cmap='Greys_r')
plt.axis('off')
plt.show()

# make prediction
output = net(test_img[0:1])
print("" +
      str(np.asscalar(nd.argmax(output, axis=1).asnumpy().astype(np.int8))))
示例#17
0
def test_ndarray_ones():
    a = nd.ones(shape=(LARGE_X, SMALL_Y))
    assert a[-1][0] == 1
    assert nd.sum(a).asnumpy() == LARGE_SIZE
 def __Htransfer(self, e, wr):
     norm_wr = wr / wr.norm(axis=1, keepdims=True)
     return e - nd.sum(e * norm_wr, 1, True) * norm_wr
    def test_op(self,
                num_samples=None,
                num_epochs=None,
                reset=True,
                dataset='test'):
        '''
        Evaluates the model using num_samples.

        Args
        ----
        num_samples: integer, default None
          The number of samples to evaluate on. This is converted to
          evaluating on (num_samples // batch_size) minibatches.
        num_epochs: integer, default None
          The number of epochs to evaluate on. This used if num_samples
          is not specified. If neither is specified, defaults to 1 epoch.
        reset: bool, default True
          Whether to reset the test data index to 0 before iterating
          through and evaluating on minibatches.
        dataset: string, default 'test':
          Which dataset to evaluate on: 'valid' or 'test'.

        Returns
        -------
        Loss_u: float
          The loss on the unlabeled data.
        Loss_l: float
          The loss on the labeled data.
        Eval_u: list of floats
          A list of evaluation metrics on the unlabeled data.
        Eval_l: list of floats
          A list of evaluation metrics on the labeled data.
        '''
        batch_size = self.args['batch_size']
        model_ctx = self.model_ctx

        if num_samples is None and num_epochs is None:
            # assume full dataset evaluation
            num_epochs = 1

        if reset:
            # Reset Data to Index Zero
            if self.data.data[dataset] is not None:
                self.data.force_reset_data(dataset)
            if self.data.data[dataset + '_with_labels'] is not None:
                self.data.force_reset_data(dataset + '_with_labels')

        # Unlabeled Data
        u_loss = 'NA'
        u_eval = []
        if self.data.data[dataset] is not None:
            u_loss = 0
            if num_samples is None:
                num_samps = self.data.data[dataset].shape[0] * num_epochs
            else:
                num_samps = num_samples
            batches = int(np.ceil(num_samps / self.args['batch_size']))
            batch_iter = range(batches)
            if batches > 1: batch_iter = tqdm(batch_iter, desc='unlabeled')
            for batch in batch_iter:
                # 1. Retrieve data
                docs = self.data.get_documents(key=dataset)
                if self.args['use_kd']:
                    split_on = docs.shape[1] // 2
                    docs, bert_logits = docs[:, :split_on], docs[:, split_on:]
                    # TODO: below is not used, but also may not be necessary
                    t = self.args['kd_softmax_temp']
                    kd_docs = nd.softmax(bert_logits / t) * nd.sum(
                        docs, axis=1, keepdims=True)

                # 2. Compute loss
                y_u = self.Enc(docs)
                y_onehot_u_softmax = nd.softmax(y_u)
                x_reconstruction_u = self.Dec(y_onehot_u_softmax)

                logits = nd.log_softmax(x_reconstruction_u)
                loss_recon_unlabel = nd.sum(-docs * logits, axis=1)

                # 3. Convert to numpy
                u_loss += nd.mean(loss_recon_unlabel).asscalar()
            u_loss /= batches

        # Labeled Data
        l_loss = 0.0
        l_acc = 0.0
        if self.data.data[dataset + '_with_labels'] is not None:
            l_loss = 0
            if num_samples is None:
                num_samps = self.data.data[
                    dataset + '_with_labels'].shape[0] * num_epochs
            else:
                num_samps = num_samples
            batches = int(np.ceil(num_samps / self.args['batch_size']))
            batch_iter = range(batches)
            if batches > 1: batch_iter = tqdm(batch_iter, desc='labeled')
            softmaxCEL = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=False)
            for batch in batch_iter:
                # 1. Retrieve data
                labeled_docs, labels = self.data.get_documents(
                    key=dataset + '_with_labels', split_on=self.data.data_dim)
                # 2. Compute loss
                y_u = self.Enc(docs)
                y_onehot_u_softmax = nd.softmax(y_u)
                class_pred = nd.argmax(y_onehot_u_softmax, axis=1)
                l_a = labels[list(range(labels.shape[0])), class_pred]
                l_acc += nd.mean(l_a).asscalar()
                labels = labels / nd.sum(labels, axis=1, keepdims=True)
                l_l = softmaxCEL(y_onehot_u_softmax, labels)

                # 3. Convert to numpy
                l_loss += nd.mean(l_l).asscalar()
            l_loss /= batches
            l_acc /= batches

        return u_loss, l_loss, l_acc
示例#20
0
def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim, 
                          learning_rate, clipping_norm, batch_size,
                          pred_period, pred_len, seqs, get_params, get_inputs,
                          ctx, corpus_indices, idx_to_char, char_to_idx,
                          is_lstm=False):
    """Train an RNN model and predict the next item in the sequence."""
    if is_random_iter:
        data_iter = data_iter_random
    else:
        data_iter = data_iter_consecutive
    params = get_params()
    
    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

    for e in range(1, epochs + 1): 
        # If consecutive sampling is used, in the same epoch, the hidden state
        # is initialized only at the beginning of the epoch.
        if not is_random_iter:
            state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            if is_lstm:
                state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
        train_loss, num_examples = 0, 0
        for data, label in data_iter(corpus_indices, batch_size, num_steps, 
                                     ctx):
            # If random sampling is used, the hidden state has to be
            # initialized for each mini-batch.
            if is_random_iter:
                state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
                if is_lstm:
                    state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            with autograd.record():
                # outputs shape:(batch_size, vocab_size)
                if is_lstm:
                    outputs, state_h, state_c = rnn(get_inputs(data), state_h,
                                                    state_c, *params) 
                else:
                    outputs, state_h = rnn(get_inputs(data), state_h, *params)
                # Let t_ib_j be the j-th element of the mini-batch at time i.
                # label shape:(batch_size * num_steps)
                # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ].
                label = label.T.reshape((-1,))
                # Concatenate outputs:
                # shape: (batch_size * num_steps, vocab_size).
                outputs = nd.concat(*outputs, dim=0)
                # Now outputs and label are aligned.
                loss = softmax_cross_entropy(outputs, label)
            loss.backward()

            grad_clipping(params, clipping_norm, ctx)
            SGD(params, learning_rate)

            train_loss += nd.sum(loss).asscalar()
            num_examples += loss.size

        if e % pred_period == 0:
            print("Epoch %d. Training perplexity %f" % (e, 
                                               exp(train_loss/num_examples)))
            for seq in seqs:
                print(' - ', predict_rnn(rnn, seq, pred_len, params,
                      hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs,
                      is_lstm))
            print()
示例#21
0
def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim, 
                          learning_rate, clipping_norm, batch_size,
                          pred_period, pred_len, seqs, get_params, get_inputs,
                          ctx, corpus_indices, idx_to_char, char_to_idx,
                          is_lstm=False):
    """Train an RNN model and predict the next item in the sequence."""
    if is_random_iter:
        data_iter = data_iter_random
    else:
        data_iter = data_iter_consecutive
    params = get_params()
    
    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

    for e in range(1, epochs + 1): 
        # If consecutive sampling is used, in the same epoch, the hidden state
        # is initialized only at the beginning of the epoch.
        if not is_random_iter:
            state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            if is_lstm:
                state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
        train_loss, num_examples = 0, 0
        for data, label in data_iter(corpus_indices, batch_size, num_steps, 
                                     ctx):
            # If random sampling is used, the hidden state has to be
            # initialized for each mini-batch.
            if is_random_iter:
                state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
                if is_lstm:
                    state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            with autograd.record():
                # outputs shape: (batch_size, vocab_size)
                if is_lstm:
                    outputs, state_h, state_c = rnn(get_inputs(data), state_h,
                                                    state_c, *params) 
                else:
                    outputs, state_h = rnn(get_inputs(data), state_h, *params)
                # Let t_ib_j be the j-th element of the mini-batch at time i.
                # label shape: (batch_size * num_steps)
                # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ].
                label = label.T.reshape((-1,))
                # Concatenate outputs:
                # shape: (batch_size * num_steps, vocab_size).
                outputs = nd.concat(*outputs, dim=0)
                # Now outputs and label are aligned.
                loss = softmax_cross_entropy(outputs, label)
            loss.backward()

            grad_clipping(params, clipping_norm, ctx)
            SGD(params, learning_rate)

            train_loss += nd.sum(loss).asscalar()
            num_examples += loss.size

        if e % pred_period == 0:
            print("Epoch %d. Training perplexity %f" % (e, 
                                               exp(train_loss/num_examples)))
            for seq in seqs:
                print(' - ', predict_rnn(rnn, seq, pred_len, params,
                      hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs,
                      is_lstm))
            print()
示例#22
0
def log_loss(output, y):
    yhat = logistic(output)
    return -nd.sum(y * nd.log(yhat) + (1 - y) * nd.log(1 - yhat))
示例#23
0
文件: test1.py 项目: seekloud/hellorl
def test_net_paras_copy():
    net1 = get_net(10)
    net2 = get_net(10)

    input = nd.arange(2880).reshape((3, 3, 20, 16))

    net1(input)
    net2(input)

    ps1 = net1.collect_params()
    ps2 = net2.collect_params()

    print(str(net1))
    print('----------------------------------')
    print(str(net2))
    print('++++++++++++++++++++++')

    print(ps1)
    print('----------------------------------')
    print(ps2)
    print('++++++++++++++++++++++')

    print(net1.prefix)
    print('----------------------------------')
    print(net2.prefix)
    print('++++++++++++++++++++++')

    prefix_length = len(net2.prefix)

    print(ps1.keys())
    print('----------------------------------')
    print(ps2.keys())
    print('++++++++++++++++++++++')

    copy_params(net1, net2)

    print('++++++++++++++++++++++')

    print(net1.collect_params().values())
    print('----------------------------------')
    print(net2.collect_params().values())
    print('++++++++++++++++++++++')

    #
    # print(net1.collect_params().items())
    # print('----------------------------------')
    # print(net2.collect_params().items())
    # print('++++++++++++++++++++++')

    # net2.collect_params().update(net1.collect_params())
    # net2[0].collect_params().update(net1[0].collect_params())

    # print(net1[0].collect_params())
    # print('----------------------------------')
    # print(net2[0].collect_params())
    # print('++++++++++++++++++++++')

    # net2[0].collect_params().update(net1[0].collect_params())

    print(nd.sum(net1[0].weight.data() - net2[0].weight.data()).asnumpy)
    print(nd.sum(net1[1].weight.data() - net2[1].weight.data()).asnumpy)
    print(nd.sum(net1[2].weight.data() - net2[2].weight.data()).asnumpy)
    print(nd.sum(net1[4].weight.data() - net2[4].weight.data()).asnumpy)
    print(nd.sum(net1[5].weight.data() - net2[5].weight.data()).asnumpy)
    print('----------------------------------')

    pass
示例#24
0
def all(tensor):
    return nd.sum(tensor != 0).asscalar()
示例#25
0
def softmax(y_linear):
    exp = nd.exp(y_linear - nd.max(y_linear))
    partition = nd.sum(exp, axis=0, exclude=True).reshape((-1, 1))
    return exp / partition
def test_reduce():
    a = nd.ones(shape=(LARGE_X, 1))
    assert nd.sum(a).asnumpy() == a.shape[0] * a.shape[1]
示例#27
0
prams = [W, W0]

# Track the gradients of the parameters
for parameter in prams:
    parameter.attach_grad()

# Execute training loop using SGD
for E in range(epochs):
    total_loss = 0
    for i, (xtrain, ytrain) in enumerate(train_data):
        xtrain = xtrain.as_in_context(cntx).reshape((-1, 784))
        ytrain = ytrain.as_in_context(cntx)
        ylabel_flag = nd.one_hot(ytrain, 5)
        with autograd.record():
            y_out = aux.nnet(xtrain, W, W0)
            loss = aux.cross_ent(y_out, ylabel_flag)
        loss.backward()
        prams = aux.SGD(prams, learn_rate)
        total_loss += nd.sum(loss).asscalar()

    # Evaluate model on training data
    train_accuracy = aux.compute_accuracy(train_data, aux.nnet, prams, cntx)

    # Evaluate model on testing data
    test_accuracy = aux.compute_accuracy(test_data, aux.nnet, prams, cntx)
    print("Epoch %s. Loss: %s, Train Accuracy: %s, Test Accuracy: %s" %
          (E, total_loss / m_cases, train_accuracy, test_accuracy))

    # Save trained parameters
    aux.save_mnist(prams)
    def train(self, train_data, log_folder, params_folder, epochs, batch_size, ctx, init_lr, lr_step=5, lr_factor=0.1):
        """
        Train network.
        :param train_mode:
        :param train_data: Data and Label for training. Instance of tuple(dict).
        - valid_keys: valid keys of current category of clothes.
        - images: Instance of tuple. All images info of current category:
            - orig_images_id: Instance of list. (image_count)
            - orig_images_shape: Instance of np.array. (image_count, orig_h, orig_w)
            - orig_keypoints: Instance of np.array. (image_count, keypoints_count, 3)
            - norm_images: Instance of np.array. (image_count, 3, h, w)
            - belief_maps: Instance of np.array. (image_count, keypoints_count, h, w)
        - norm_centermap: Instance of np.array. (h, w)
        :param params_folder: Folder holds saved params.
        :param epochs:
        :param batch_size:
        :param ctx: Instance of list.
        :return:
        """
        logging.basicConfig(level=logging.INFO,
                            handlers=[logging.StreamHandler(),
                                      logging.FileHandler(log_folder + 'train_' + self._name + '_batch_' + str(epochs) + '_' + str(batch_size))])

        # 1. check params files and get last epoch and batch
        epoch_index, batch_index, file = self.utils_params_file(batch_size, 'check', params_folder)
        # (1) begin a new training
        if epoch_index == -1 and batch_index == -1:
            logging.info("No params files detected. Begin a new training.")
            self.initialize(mx.init.Xavier(magnitude=2.34), ctx=ctx)
            epoch_index = 0
            batch_index = 0
        # (2) resume training from params file
        else:
            logging.info("Params file '%s' detected. Last (epoch, batch): (%d, %d). Resuming training." % (file, epoch_index, batch_index))
            self.collect_params().load(params_folder + file, ctx=ctx)
            batch_index += 1
        # 2. train
        # (1) trainer and loss function for total training mode
        model_trainer = trainer.Trainer(self.collect_params(), 'sgd',
                                        {'learning_rate': init_lr, 'momentum': 0.9, 'wd': 5e-4})
        loss_function = loss.SoftmaxCrossEntropyLoss(sparse_label=False)
        # (2) train each epoch and batch
        for e in range(epoch_index, epochs):
            if e != epoch_index: batch_index = 0
            # 1> set learning rate
            model_trainer.set_learning_rate(init_lr * pow(lr_factor, int(e / lr_step)))
            if e % lr_step == 0:
                logging.info('Learning rate now is set to be %.6f' % model_trainer.learning_rate)
            # 2> train batch
            while True:
                # (1) get data
                _, _, orig_images_shape_batch, orig_keypoints_batch, norm_images_batch, norm_center_maps_batch, belief_maps_batch, _ = \
                    train_data.get_batch_data(if_data_aug=True, loss_mode='softmax', batch_index=batch_index, batch_size=batch_size)
                if norm_images_batch is None and norm_center_maps_batch is None and belief_maps_batch is None: break
                # (2) split data into multiple GPU
                norm_images_batch_LIST = split_and_load(norm_images_batch, ctx_list=ctx)
                norm_center_maps_batch_LIST = split_and_load(norm_center_maps_batch, ctx_list=ctx)
                belief_maps_batch_LIST = split_and_load(belief_maps_batch, ctx_list=ctx)
                #-------------------------------------------------------------------------------------------------------
                # (3) train total
                pred_beliefMaps_batch = []
                # 1> record auto grad
                with autograd.record():
                    # 1.initiate gpu losses
                    gpu_losses = []
                    # 2.calculate losses on each gpu of each stage
                    for norm_images_batch, norm_center_maps_batch, belief_maps_batch in zip(norm_images_batch_LIST,
                                                                                            norm_center_maps_batch_LIST,
                                                                                            belief_maps_batch_LIST):
                        # (1) initiate current gpu loss
                        current_gpu_loss = None
                        # (2) network forward
                        pred_beliefMaps = self.forward(input_images=norm_images_batch, center_maps=norm_center_maps_batch)
                        for p_b in pred_beliefMaps[-1].asnumpy(): pred_beliefMaps_batch.append(p_b)
                        # (3) shape groud-truth belief maps to use softmax loss
                        shaped_gt_beliefMaps = nd.reshape(belief_maps_batch,
                                                          shape=(belief_maps_batch.shape[0], belief_maps_batch.shape[1],
                                                                 belief_maps_batch.shape[2] * belief_maps_batch.shape[3]))
                        # (4) calculate each and every stage loss on current gpu
                        for stage in range(len(self._block_stage)):
                            # 1> shape predicted belief map of current stage
                            shaped_pred_beliefMap = nd.reshape(pred_beliefMaps[stage],
                                                               shape=(pred_beliefMaps[stage].shape[0],
                                                                      pred_beliefMaps[stage].shape[1],
                                                                      pred_beliefMaps[stage].shape[2] *
                                                                      pred_beliefMaps[stage].shape[3]))
                            # 2> calculate current stage loss on current gpu
                            current_loss = loss_function(shaped_pred_beliefMap, shaped_gt_beliefMaps)
                            # 3> summary
                            current_gpu_loss = current_loss if current_gpu_loss is None else (current_gpu_loss + current_loss)
                        # (5) append & save
                        gpu_losses.append(current_gpu_loss)
                # 3> backward and update
                for gpu_loss in gpu_losses:
                    gpu_loss.backward()
                model_trainer.step(batch_size)
                nd.waitall()
                # 4> calculate batch average loss
                batch_loss = sum([nd.sum(gpu_loss).asscalar() for gpu_loss in gpu_losses]) / (batch_size * len(self._block_stage))
                NE = self.calculate_error(valid_keys=utils.keypoints_order[train_data.category],
                                          category=train_data.category,
                                          predicted_keypoints=self.transform_beliefMaps_into_origKeypoints(
                                              predicted_beliefMaps=np.array(pred_beliefMaps_batch),
                                              orig_images_shape=orig_images_shape_batch),
                                          orig_keypoints=np.array(orig_keypoints_batch))
                # 5> print
                logging.info("Epoch[%d]-Batch[%d] lr: %f. Average loss: %f. NE:%.2f%%" % (e, batch_index, model_trainer.learning_rate, batch_loss, NE*100))
                #-------------------------------------------------------------------------------------------------------
                # (4) save params with batch info (batch_size, batch_index)
                params_file = self.utils_params_file(operation='generate', batch_size=batch_size, epoch_index=e, batch_index=batch_index)
                params_old_file = self.utils_params_file(operation='generate', batch_size=batch_size,
                                                         epoch_index=e, batch_index=batch_index - 1, batches=train_data.calc_batches_count(batch_size))
                self.collect_params().save(params_folder + params_file)
                if os.path.exists(params_folder + params_old_file): os.remove(params_folder + params_old_file)
                batch_index += 1
        # 3.finish
        logging.info("Training completed.")
def test_ndarray_ones():
    a = nd.ones(shape=LARGE_X)
    assert a[-1] == 1
    assert nd.sum(a).asnumpy() == LARGE_X
示例#30
0
def train(train_data, net, loss, ctx,global_step,epoch_step, num_epochs,best_F1=0):
    print("Start training on ", ctx)
      
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    for epoch in range(num_epochs):
        if epoch<50:
            trainer = gluon.Trainer(net.collect_params(),'adam', {'learning_rate': 0.001, 'wd':1e-3})
        elif epoch<90:
            trainer = gluon.Trainer(net.collect_params(),'adam', {'learning_rate': 0.0001, 'wd':1e-3})
        elif epoch<120:
            trainer = gluon.Trainer(net.collect_params(),'adam', {'learning_rate': 0.00001, 'wd':1e-3})
        else:
            trainer = gluon.Trainer(net.collect_params(),'sgd', {'learning_rate': 0.000001,'momentum': 0.9,'wd':1e-3})
        train_loss, n,  = 0.0, 0.0
        TP,TN,FP,FN=0,0,0,0
        start = time()
        for i,batch in enumerate(train_data):
            data, label, batch_size = get_batch(batch, ctx)
            losses = []
            with autograd.record():
                outputs = [net(X) for X in data]
                losses = [loss(yhat, y) for yhat, y in zip(outputs, label)]
            
            for l in losses:
                l.backward()
            sw.add_scalar(tag='cross_entropy', value=l.mean().asscalar(), global_step=global_step)
            global_step += 1

            train_loss += sum([l.sum().asscalar() for l in losses])
            n += batch_size
            
            trainer.step(batch_size)
        for data,label in test_data:
            data=data.as_in_context(ctx[0])
            label=label.as_in_context(ctx[0])
            pred=net(data)
            nd.waitall()
            pred=nd.sigmoid(pred)
            pred=(pred>0.5).reshape(-1,256,256)
            
            TPt=nd.sum(pred*label).asscalar()
            FPt=nd.sum(pred-(pred*label)).asscalar()
            FNt=nd.sum(label-(pred*label)).asscalar()
            TNt=nd.sum((1-pred)*(1-label)).asscalar()
            
            TP=TP+TPt
            FP=FP+FPt
            FN=FN+FNt
            TN=TN+TNt

        ACC=(TP+TN)/(TP+TN+FP+FN+1e-15)
        TPR=TP/ (TP+ FN+1e-15)     
        TNR= TN/(FP+TN+1e-15)
        PPV=TP/(TP+FP+1e-15)
        F1=2*PPV*TPR/(PPV+TPR+1e-15)
        
        sw.add_scalar(tag='test_acc', value=ACC, global_step=epoch_step)
        sw.add_scalar(tag='test_TPR', value=TPR, global_step=epoch_step)
        sw.add_scalar(tag='test_TNR', value=TNR, global_step=epoch_step)
        sw.add_scalar(tag='test_PPV', value=PPV, global_step=epoch_step)
        sw.add_scalar(tag='F1', value=F1, global_step=epoch_step)
        epoch_step+=1
        print('EPOCH',epoch)
        print('test_acc=',ACC)
        print('test_TPR=',TPR)
        print('test_TNR=',TNR)
        print('test_PPV=',PPV) 
        print('F1=',F1)  
        if F1>best_F1:
            net.save_parameters('fold2_unet.params')
            best_F1=F1
        if epoch == 0:
            sw.add_graph(net)
            
        print('train_loss=',train_loss/n)
        print('time:',time() - start)
    sw.close()
    net.export("mynet", epoch)
示例#31
0
def CapLoss(y_pred, y_true):
    L = y_true * nd.square(nd.maximum(0., 0.9 - y_pred)) + \
        0.5 * (1 - y_true) * nd.square(nd.maximum(0., y_pred - 0.1))
    return nd.mean(nd.sum(L, 1))
示例#32
0
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': .001})
training_loss_vector = []
validation_loss_vector = []

for e in range(epochs):
    cumulative_loss_train = 0
    cumulative_loss_valid = 0
    for i, (data, label) in enumerate(data_iter_loader_train):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        with autograd.record():
            output = net(data)
            loss_train = softmax_cross_entropy(output, label)
        loss_train.backward()
        trainer.step(data.shape[0])
        cumulative_loss_train += nd.sum(loss_train).asscalar()
    for j, (data, label) in enumerate(data_iter_loader_valid):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        with autograd.record():
            output = net(data)
            loss_valid = softmax_cross_entropy(output, label)
        cumulative_loss_valid += nd.sum(loss_valid).asscalar()
        print(cumulative_loss_train / 42000, '   ***   ', '***',
              cumulative_loss_valid / 18000)
        validation_accuracy = evaluate_accuracy(data_iter_loader_valid, net)
        train_accuracy = evaluate_accuracy(data_iter_loader_train, net)
        print("Epoch %s , train_acc %s, validation_acc %s" %
              (e, train_accuracy, validation_accuracy))
    training_loss_vector.append(cumulative_loss_train)
    validation_loss_vector.append(cumulative_loss_valid)
    def unlabeled_train_op_adv_combine_add(self, update_enc=True):
        '''
        Trains the GAN model
        '''
        batch_size = self.args['batch_size']
        model_ctx = self.model_ctx
        eps = 1e-10
        ##########################
        ### unsupervised phase ###
        ##########################
        # Retrieve data
        docs = self.data.get_documents(key='train')

        class_true = nd.zeros(batch_size, dtype='int32', ctx=model_ctx)
        class_fake = nd.ones(batch_size, dtype='int32', ctx=model_ctx)
        loss_reconstruction = nd.zeros((1, ), ctx=model_ctx)

        ### adversarial phase ###
        discriminator_z_confidence_true = nd.zeros(shape=(1, ), ctx=model_ctx)
        discriminator_z_confidence_fake = nd.zeros(shape=(1, ), ctx=model_ctx)
        discriminator_y_confidence_true = nd.zeros(shape=(1, ), ctx=model_ctx)
        discriminator_y_confidence_fake = nd.zeros(shape=(1, ), ctx=model_ctx)
        loss_discriminator = nd.zeros(shape=(1, ), ctx=model_ctx)
        dirich_entropy = nd.zeros(shape=(1, ), ctx=model_ctx)

        ### generator phase ###
        loss_generator = nd.zeros(shape=(1, ), ctx=model_ctx)

        ### reconstruction phase ###
        with autograd.record():
            y_u = self.Enc(docs)
            y_onehot_u_softmax = nd.softmax(y_u)
            x_reconstruction_u = self.Dec(y_onehot_u_softmax)

            logits = nd.log_softmax(x_reconstruction_u)
            loss_reconstruction = nd.sum(-docs * logits, axis=1)
            loss_total = loss_reconstruction * self.args['recon_alpha']

            if self.args['adverse']:  #and np.random.rand()<0.8:
                y_true = np.random.dirichlet(np.ones(self.ndim_y) *
                                             self.args['dirich_alpha'],
                                             size=batch_size)
                y_true = nd.array(y_true, ctx=model_ctx)
                dy_true = self.Dis_y(y_true)
                dy_fake = self.Dis_y(y_onehot_u_softmax)
                discriminator_y_confidence_true = nd.mean(
                    nd.softmax(dy_true)[:, 0])
                discriminator_y_confidence_fake = nd.mean(
                    nd.softmax(dy_fake)[:, 1])
                softmaxCEL = gluon.loss.SoftmaxCrossEntropyLoss()
                loss_discriminator = softmaxCEL(dy_true, class_true) + \
                                       softmaxCEL(dy_fake, class_fake)
                loss_generator = softmaxCEL(dy_fake, class_true)
                loss_total = loss_total + loss_discriminator + loss_generator
                dirich_entropy = nd.mean(
                    nd.sum(-y_true * nd.log(y_true + eps), axis=1))

        loss_total.backward()

        self.optimizer_enc.step(batch_size)
        self.optimizer_dec.step(batch_size)
        self.optimizer_dis_y.step(batch_size)

        latent_max = nd.zeros(self.args['ndim_y'], ctx=model_ctx)
        for max_ind in nd.argmax(y_onehot_u_softmax, axis=1):
            latent_max[max_ind] += 1.0
        latent_max /= batch_size
        latent_entropy = nd.mean(
            nd.sum(-y_onehot_u_softmax * nd.log(y_onehot_u_softmax + eps),
                   axis=1))
        latent_v = nd.mean(y_onehot_u_softmax, axis=0)

        return nd.mean(loss_discriminator).asscalar(), nd.mean(loss_generator).asscalar(), nd.mean(loss_reconstruction).asscalar(), \
               nd.mean(discriminator_z_confidence_true).asscalar(), nd.mean(discriminator_z_confidence_fake).asscalar(), \
               nd.mean(discriminator_y_confidence_true).asscalar(), nd.mean(discriminator_y_confidence_fake).asscalar(), \
               latent_max.asnumpy(), latent_entropy.asscalar(), latent_v.asnumpy(), dirich_entropy.asscalar()
示例#34
0
def softmax(X):
    X_max = nd.max(X, axis=1, keepdims=True)
    X = X - X_max
    exp = nd.exp(X)
    partition = nd.sum(exp, axis=1, keepdims=True)
    return exp / partition
    def unlabeled_train_op_mmd_combine(self, update_enc=True):
        '''
        Trains the MMD model
        '''
        batch_size = self.args['batch_size']
        model_ctx = self.model_ctx
        eps = 1e-10

        # Retrieve data
        docs = self.data.get_documents(key='train')
        if self.args['use_kd']:
            split_on = docs.shape[1] // 2
            docs, bert_logits = docs[:, :split_on], docs[:, split_on:]
            t = self.args['kd_softmax_temp']
            kd_docs = nd.softmax(bert_logits / t) * nd.sum(
                docs, axis=1, keepdims=True)
            kd_docs = kd_docs * (kd_docs > self.args['kd_min_count'])

        y_true = np.random.dirichlet(np.ones(self.ndim_y) *
                                     self.args['dirich_alpha'],
                                     size=batch_size)
        y_true = nd.array(y_true, ctx=model_ctx)

        with autograd.record():
            ### reconstruction phase ###
            y_onehot_u = self.Enc(docs)
            y_onehot_u_softmax = nd.softmax(y_onehot_u)
            if self.args['latent_noise'] > 0:
                y_noise = np.random.dirichlet(np.ones(self.ndim_y) *
                                              self.args['dirich_alpha'],
                                              size=batch_size)
                y_noise = nd.array(y_noise, ctx=model_ctx)
                y_onehot_u_softmax = (
                    1 - self.args['latent_noise']
                ) * y_onehot_u_softmax + self.args['latent_noise'] * y_noise
            x_reconstruction_u = self.Dec(y_onehot_u_softmax)

            if self.args['use_kd']:
                kd_logits = nd.log_softmax(x_reconstruction_u / t)
                logits = nd.log_softmax(x_reconstruction_u)

                kd_loss_reconstruction = nd.mean(
                    nd.sum(-kd_docs * kd_logits, axis=1))
                loss_reconstruction = nd.mean(nd.sum(-docs * logits, axis=1))

                loss_total = self.args['recon_alpha'] * (
                    self.args['kd_loss_alpha'] * t * t *
                    (kd_loss_reconstruction) +
                    (1 - self.args['kd_loss_alpha']) * loss_reconstruction)
            else:
                logits = nd.log_softmax(x_reconstruction_u)
                loss_reconstruction = nd.mean(nd.sum(-docs * logits, axis=1))
                loss_total = loss_reconstruction * self.args['recon_alpha']

            ### mmd phase ###
            if self.args['adverse']:
                y_fake = self.Enc(docs)
                y_fake = nd.softmax(y_fake)
                loss_mmd = mmd_loss(y_true,
                                    y_fake,
                                    ctx_model=model_ctx,
                                    t=self.args['kernel_alpha'])
                loss_total = loss_total + loss_mmd

            if self.args['l2_alpha'] > 0:
                loss_total = loss_total + self.args['l2_alpha'] * nd.mean(
                    nd.sum(nd.square(y_onehot_u), axis=1))

            loss_total.backward()

        self.optimizer_enc.step(1)
        self.optimizer_dec.step(1)  # self.m.args['batch_size']

        latent_max = nd.zeros(self.args['ndim_y'], ctx=model_ctx)
        for max_ind in nd.argmax(y_onehot_u, axis=1):
            latent_max[max_ind] += 1.0
        latent_max /= batch_size
        latent_entropy = nd.mean(
            nd.sum(-y_onehot_u_softmax * nd.log(y_onehot_u_softmax + eps),
                   axis=1))
        latent_v = nd.mean(y_onehot_u_softmax, axis=0)
        dirich_entropy = nd.mean(nd.sum(-y_true * nd.log(y_true + eps),
                                        axis=1))

        if self.args['adverse']:
            loss_mmd_return = loss_mmd.asscalar()
        else:
            loss_mmd_return = 0.0
        return nd.mean(loss_reconstruction).asscalar(
        ), loss_mmd_return, latent_max.asnumpy(), latent_entropy.asscalar(
        ), latent_v.asnumpy(), dirich_entropy.asscalar()
示例#36
0
        self.linear1 = nn.Dense(in_units=confidence_C,units=(confidence_C+K_way)//2,\
                                use_bias=True,activation='relu')
        self.linear2 = nn.Dense(units=K_way)

    def forward(self, x):
        x = self.linear1(x)
        x = self.linear2(x)
        return x  # x shape is N*K_way,to pred top_k is the output_label.loss is SoftmaxwithCrossentropy


if __name__ == '__main__':
    from mxnet.gluon.loss import SigmoidBinaryCrossEntropyLoss
    from mxnet import nd, autograd
    model = Decision_thresh(thresh_size=4)
    model.initialize(init=mx.init.Xavier())
    x = nd.array([[0.1, 0.7, 0.9, 0.4], [0.8, 0.5, 0.8, 0.1]])
    label = nd.array([[0, 1, 1, 0], [1, 0, 0, 0]])
    loss_criterion = SigmoidBinaryCrossEntropyLoss()
    with autograd.record():
        y_pred = model(x)
        loss = loss_criterion(y_pred, label)
        print("loss", nd.sum(loss).asscalar())
        loss.backward()
        print(model.thresh.grad())

    # to test the Decision_topk model to predict the top_k is groud truth

    model2 = Decision_topk(confidence_C=63, K_way=4)
    mdoel2.initialize(init=mx.init.Xavier())
    #x = nd.
示例#37
0
def squash(x, axis):

    s_squared_norm = nd.sum(nd.square(x), axis, keepdims=True)
    scale = s_squared_norm / (1 + s_squared_norm) / nd.sqrt(s_squared_norm +
                                                            1e-5)
    return scale * x
示例#38
0
    tel = []
    for epoch in range(500):
        total_L = 0.0
        hidden = rnn.begin_state(func=mx.nd.zeros,batch_size = batch_size,ctx=mx.cpu())

        for data,label in dataLoader.dataIter(batch_size):
            label = nd.array(label)
           # print("label shape" ,label.shape)
            #label = nd.ones(shape=(5,32)) * label
            #label = label.reshape((-1,))
            dd = nd.array(data.reshape((batch_size,5,11)).swapaxes(0,1))
            hidden = detach(hidden)
            with mx.autograd.record():
                output, hidden = rnn(dd,hidden)
                output = output.reshape((5,256,1))
                output = nd.sum(output,axis=0)/5
               # print(output.shape)
                lv = loss(output,label)
            lv.backward()
            grads = [i.grad() for i in rnn.collect_params().values()]
            mx.gluon.utils.clip_global_norm(grads,clipping_norm*num_steps*batch_size)
            trainer.step(batch_size)
            total_L += mx.nd.sum(lv).asscalar()
        test_loss = evals(rnn,c,d,batch_size)
        trl.append(total_L/len(a))
        tel.append(test_loss)
        print("Epoch %d loss %.4f test loss %.4f train acc %.4f test acc %.4f" %(epoch, total_L/len(a), test_loss,predict(rnn,a,b),predict(rnn,c,d)))
    with open("rnn.csv",'w',newline='') as f:
        import csv
        writer = csv.writer(f)
        writer.writerows([trl,tel])
示例#39
0
def test_reduce():
    a = nd.ones(shape=(LARGE_X, SMALL_Y)) 
    assert nd.sum(a).asnumpy() == a.shape[0] * a.shape[1]
示例#40
0
def softmax(y_linear, temperature=1.0):
    lin = (y_linear - nd.max(y_linear)) / temperature
    exp = nd.exp(lin)
    partition = nd.sum(exp, axis=0, exclude=True).reshape((-1, 1))
    return exp / partition
 def check_ndarray_ones():
     a = nd.ones(shape=LARGE_X)
     assert a[-1] == 1
     assert nd.sum(a) == LARGE_X
示例#42
0
    def forward(self, word_inputs, tag_inputs, arc_targets=None, rel_targets=None):
        """Run decoding

        Parameters
        ----------
        word_inputs : mxnet.ndarray.NDArray
            word indices of seq_len x batch_size
        tag_inputs : mxnet.ndarray.NDArray
            tag indices of seq_len x batch_size
        arc_targets : mxnet.ndarray.NDArray
            gold arc indices of seq_len x batch_size
        rel_targets : mxnet.ndarray.NDArray
            gold rel indices of seq_len x batch_size
        Returns
        -------
        tuple
            (arc_accuracy, rel_accuracy, overall_accuracy, loss) when training, else if given gold target
        then return arc_accuracy, rel_accuracy, overall_accuracy, outputs, otherwise return outputs, where outputs is a
        list of (arcs, rels).
        """
        is_train = autograd.is_training()

        def flatten_numpy(ndarray):
            """Flatten nd-array to 1-d column vector

            Parameters
            ----------
            ndarray : numpy.ndarray
                input tensor

            Returns
            -------
            numpy.ndarray
                A column vector

            """
            return np.reshape(ndarray, (-1,), 'F')

        batch_size = word_inputs.shape[1]
        seq_len = word_inputs.shape[0]
        mask = np.greater(word_inputs, self._vocab.ROOT).astype(np.float32)
        num_tokens = int(np.sum(mask))  # non padding, non root token number

        if is_train or arc_targets is not None:
            mask_1D = flatten_numpy(mask)
            mask_1D_tensor = nd.array(mask_1D)

        unked_words = np.where(word_inputs < self._vocab.words_in_train, word_inputs, self._vocab.UNK)
        word_embs = self.word_embs(nd.array(unked_words, dtype='int'))
        if self.pret_word_embs:
            word_embs = word_embs + self.pret_word_embs(nd.array(word_inputs))
        tag_embs = self.tag_embs(nd.array(tag_inputs))

        # Dropout
        emb_inputs = nd.concat(word_embs, tag_embs, dim=2)  # seq_len x batch_size

        top_recur = biLSTM(self.f_lstm, self.b_lstm, emb_inputs, batch_size,
                           dropout_x=self.dropout_lstm_input if is_train else 0)
        top_recur = nd.Dropout(data=top_recur, axes=[0], p=self.dropout_mlp)

        W_dep, b_dep = self.mlp_dep_W.data(), self.mlp_dep_b.data()
        W_head, b_head = self.mlp_head_W.data(), self.mlp_head_b.data()
        dep, head = leaky_relu(nd.dot(top_recur, W_dep.T) + b_dep), leaky_relu(nd.dot(top_recur, W_head.T) + b_head)
        dep, head = nd.Dropout(data=dep, axes=[0], p=self.dropout_mlp), nd.Dropout(data=head, axes=[0],
                                                                                       p=self.dropout_mlp)
        dep, head = nd.transpose(dep, axes=[2, 0, 1]), nd.transpose(head, axes=[2, 0, 1])
        dep_arc, dep_rel = dep[:self.mlp_arc_size], dep[self.mlp_arc_size:]
        head_arc, head_rel = head[:self.mlp_arc_size], head[self.mlp_arc_size:]

        W_arc = self.arc_W.data()
        arc_logits = bilinear(dep_arc, W_arc, head_arc, self.mlp_arc_size, seq_len, batch_size, num_outputs=1,
                              bias_x=True, bias_y=False)
        # (#head x #dep) x batch_size

        flat_arc_logits = reshape_fortran(arc_logits, (seq_len, seq_len * batch_size))
        # (#head ) x (#dep x batch_size)

        arc_preds = arc_logits.argmax(0)
        # seq_len x batch_size

        if is_train or arc_targets is not None:
            correct = np.equal(arc_preds.asnumpy(), arc_targets)
            arc_correct = correct.astype(np.float32) * mask
            arc_accuracy = np.sum(arc_correct) / num_tokens
            targets_1D = flatten_numpy(arc_targets)
            losses = self.softmax_loss(flat_arc_logits, nd.array(targets_1D))
            arc_loss = nd.sum(losses * mask_1D_tensor) / num_tokens

        if not is_train:
            arc_probs = np.transpose(
                np.reshape(nd.softmax(flat_arc_logits, axis=0).asnumpy(), (seq_len, seq_len, batch_size), 'F'))
        # #batch_size x #dep x #head

        W_rel = self.rel_W.data()
        rel_logits = bilinear(dep_rel, W_rel, head_rel, self.mlp_rel_size, seq_len, batch_size,
                              num_outputs=self._vocab.rel_size, bias_x=True, bias_y=True)
        # (#head x rel_size x #dep) x batch_size

        flat_rel_logits = reshape_fortran(rel_logits, (seq_len, self._vocab.rel_size, seq_len * batch_size))
        # (#head x rel_size) x (#dep x batch_size)

        _target_vec = nd.array(targets_1D if is_train else flatten_numpy(arc_preds.asnumpy())).reshape(
            seq_len * batch_size, 1)
        _target_mat = _target_vec * nd.ones((1, self._vocab.rel_size))

        partial_rel_logits = nd.pick(flat_rel_logits, _target_mat.T, axis=0)
        # (rel_size) x (#dep x batch_size)

        if is_train or arc_targets is not None:
            rel_preds = partial_rel_logits.argmax(0)
            targets_1D = flatten_numpy(rel_targets)
            rel_correct = np.equal(rel_preds.asnumpy(), targets_1D).astype(np.float32) * mask_1D
            rel_accuracy = np.sum(rel_correct) / num_tokens
            losses = self.softmax_loss(partial_rel_logits, nd.array(targets_1D))
            rel_loss = nd.sum(losses * mask_1D_tensor) / num_tokens

        if not is_train:
            rel_probs = np.transpose(np.reshape(nd.softmax(flat_rel_logits.transpose([1, 0, 2]), axis=0).asnumpy(),
                                                (self._vocab.rel_size, seq_len, seq_len, batch_size), 'F'))
        # batch_size x #dep x #head x #nclasses

        if is_train or arc_targets is not None:
            loss = arc_loss + rel_loss
            correct = rel_correct * flatten_numpy(arc_correct)
            overall_accuracy = np.sum(correct) / num_tokens

        if is_train:
            return arc_accuracy, rel_accuracy, overall_accuracy, loss

        outputs = []

        for msk, arc_prob, rel_prob in zip(np.transpose(mask), arc_probs, rel_probs):
            # parse sentences one by one
            msk[0] = 1.
            sent_len = int(np.sum(msk))
            arc_pred = arc_argmax(arc_prob, sent_len, msk)
            rel_prob = rel_prob[np.arange(len(arc_pred)), arc_pred]
            rel_pred = rel_argmax(rel_prob, sent_len)
            outputs.append((arc_pred[1:sent_len], rel_pred[1:sent_len]))

        if arc_targets is not None:
            return arc_accuracy, rel_accuracy, overall_accuracy, outputs
        return outputs
示例#43
0
文件: gru.py 项目: HaoranYi/gitProj
def softmax(y_linear, temperature=1.0):
    lin = (y_linear-nd.max(y_linear)) / temperature
    exp = nd.exp(lin)
    partition = nd.sum(exp, axis=0, exclude=True).reshape((-1,1))
    return exp / partition
示例#44
0
def get_distance_matrix(x):
    """Get distance matrix given a matrix. Used in testing."""
    square = nd.sum(x ** 2.0, axis=1, keepdims=True)
    distance_square = square + square.transpose() - (2.0 * nd.dot(x, x.transpose()))
    return nd.sqrt(distance_square)
示例#45
0
def evaluate_accuracy(data_iterator, net, num_inputs, num_outputs):
    total_preds = nd.zeros(num_outputs)
    total_labels = nd.zeros(num_outputs)
    total_overlap = nd.zeros(num_outputs)

    numerator = 0.
    denominator = 0.
    for i, (data, label) in enumerate(data_iterator):

        if isMnist:
            data = data.as_in_context(ctx).reshape((-1, num_inputs))
            label = label.as_in_context(ctx)
            output = net(data)
            predictions = nd.argmax(output, axis=1)
            number_same = nd.sum(predictions == label)
        else:
            data = data.as_in_context(ctx).astype(numpy.float32)
            label = label.as_in_context(ctx).astype(numpy.float32)
            output = net(data)

            soft_output = softmax(output)
            single_prediction_index = nd.argmax(soft_output, axis=1)

            single_label_index = nd.argmax(label, axis=1)
            number_same = nd.sum(single_prediction_index == single_label_index)

            #output = net(data)
            #predictions = nd.argmax(output, axis=1)
            #numerator += nd.sum(predictions == label)
            numerator += number_same
            denominator += data.shape[0]

            for l in nd.arange(0, num_outputs):

                if True:
                    pp = (single_prediction_index == l)
                    preds = nd.sum(pp)
                    ll = (single_label_index == l)
                    labels = nd.sum(ll)
                    ss = pp + ll
                    overlap = nd.sum(ss > 1)
                    x = 1
                else:
                    preds = 0
                    labels = 0
                    overlap = 0

                    for j in nd.arange(0, batch_size):
                        if single_prediction_index[j] == l: preds += 1
                        if single_label_index[j] == l: labels += 1
                        if (single_prediction_index[j] == l and single_label_index[j] == l \
                          and single_prediction_index[j] == single_label_index[j]):
                            overlap += 1

                ##aaa = numpy.set((single_prediction_index == i).asnumpy())
                ##bbb = numpy.set((single_prediction_index == single_label_index).asnumpy())
                ##ccc = numpy.intersect1d(aaa,bbb)
                ###prediction_nominator[i] +=
                ##prediction_indices = numpy.nonzero((i == single_prediction_index and single_prediction_index == single_label_index).asnumpy())
                ##prediction_set = numpy.set(prediction_indices)
                ###prediction_denominator[single_prediction_index] +=
                #####label_set = numpy.set(i == single_prediction_index)
                total_preds[l] += preds
                total_labels[l] += labels
                total_overlap[l] += overlap

    for l in nd.arange(0, num_outputs):
        p = (total_overlap[l] / total_labels[l]).asscalar()
        r = (total_overlap[l] / total_preds[l]).asscalar()
        f1 = 1 * (p * r) / (p + r)
        print('f1: %s, l %s, p %s, l: %s, o: %s .. precision: %s, recall: %s'   \
          , (f1 \
            , l.asscalar(), total_preds[l].asscalar() \
            , total_labels[l].asscalar() \
            , total_overlap[l].asscalar() \
            , p \
            , r \
          )        )

    return (numerator / denominator).asscalar()
def test_sum():
    a = nd.ones(LARGE_X)
    b = nd.sum(a, axis=0)
    assert b[0] == LARGE_X
示例#47
0
def accuracy(output, label, batch_size):
    out = nd.argmax(output, axis=1)
    res = nd.sum(nd.equal(out.reshape((-1, 1)), label)) / batch_size
    return res
示例#48
0
def analy_model(mask=None, model=None, kernel_size=(1, 3), show=False):
    '''
    for build an curve of numbers of paramers in kernel
    and
    extract top 3 paramers; got related mask; obtain the right order of key
    '''
    from layers.dy_conv import new_conv
    from units import init_sphere
    from layers.sphere_net import SphereNet20
    if mask is None:
        mask = "/home/ldc/PycharmProjects/Dy/log_4dy_Ns3/global.param"
    if model is None:
        model = 'log_4dy_Ns3/spherenet_ft_Ns.model'
    ctx = mxnet.cpu()

    mnet = SphereNet20(my_fun=new_conv, use_bn=False)
    # gammas = init_sphere(mnet, model, ctx)
    # paramers = nd.load(model)
    netMask = {}
    if os.path.exists(mask):
        with open(mask) as f:
            sv = pickle.load(f)
            for k, v in sv.netMask.items():
                netMask[k] = v.as_in_context(ctx)
    all = 0
    static = {}
    paramers = {}
    cal_mask = {}
    loaded = nd.load(model)
    k = loaded.keys()
    keyorder = mnet.collect_params().keys()
    loaded_key = rearrange(target_key=keyorder, needfix_key=k, show=show)
    for idx_key, key in enumerate(keyorder):
        t_k = loaded_key[idx_key]
        value = loaded[t_k]
        if not ('conv' in key and 'weight' in key):
            paramers[key] = value
            continue
        size = value.shape
        output, input = size[:2]
        # name = 'spherenet200_' + '_'.join(key.split('.')[2:])
        ISname = key in netMask.keys()
        masked = netMask[key]
        # print key, ':',
        masked = masked.reshape(size[:2] + (-1, ))
        masked = nd.sum(masked, axis=-1)
        static[key] = nd.zeros(output)
        static[key + '_minus'] = nd.zeros(output)
        if all < output: all = output
        for i in range(output):
            static[key][i] = nd.sum(masked[i] > 3) / input
            static[key + '_minus'][i] = nd.sum(masked[i] < 3) / input
        static[key] = static[key].sort()
        static[key + '_minus'] = static[key + '_minus'].sort()
        # --------------------deal with paramers in net----------------
        N, C, K1, K2 = value.shape
        value_trans = value.reshape(N, C, -1)
        tops_mask = nd.topk(nd.abs(value_trans), k=3, ret_typ='mask')
        tops_idx = nd.topk(tops_mask, k=3, ret_typ='indices')
        value_trans = value_trans.reshape(-1, K1 * K2)
        cal_mask[key] = tops_idx.asnumpy().astype(int)
        tops_idx = tops_idx.reshape(-1, 3)

        out = []
        for x in range(3):
            out.append(value_trans[range(N * C), tops_idx[:, x]])
        paramers[key] = nd.stack(*out).transpose().reshape((N, C) +
                                                           kernel_size)
        print key
    print 'analysis loop stop'
    if show:
        from mxboard import SummaryWriter
        sw = SummaryWriter(logdir='sphere_dynamic', flush_secs=20000)
        for j in range(all):
            for k, v in static.items():
                if j >= v.shape[0]:
                    continue
                sw.add_scalar(tag=k, value=v[j].asscalar(), global_step=j)

    return paramers, cal_mask, keyorder
示例#49
0
文件: utils.py 项目: z01nl1o02/tests
def accuracy(output, label):
    return nd.sum(output.argmax(axis=1)==label).asscalar()
 def predict(self, x):
     h = self.e(x[:, 0])
     r = self.r(x[:, 2])
     t = self.e(x[:, 1])
     score = h + r - t
     return nd.sum(score**2, axis=1, keepdims=True)**0.5
示例#51
0
def cross_entropy(yhat, y):
    return -nd.mean(nd.sum(y * nd.log(yhat), axis=0, exclude=True))
示例#52
0
 def neglogp(action, mean, logstd):
     assert (mean.shape[-1] == logstd.shape[-1])
     std = nd.exp(logstd) + 1e-8
     return 0.5 * nd.sum(nd.square((action - mean) / std), axis=-1) \
         + 0.5 * np.log(2.0 * np.pi) * action.shape[-1] \
         + nd.sum(logstd, axis=-1)