Пример #1
0
    def __init__(self, dataset, ctx, labels=None, shape=None, label_shape=None, *args, **kwargs):
        super().__init__(*args, **kwargs)

        if labels is not None:
            llen = 0
            for cond in labels:
                llen += (dataset._label == cond).sum()
            self._length = llen
        else:
            self._length = len(dataset)

        if shape is None:
            shape = dataset._data.shape[1:]
        if label_shape is None:
            label_shape = dataset._label.shape[1:]

        self._data = nd.zeros([self._length] + list(shape), dtype='float32', ctx=ctx)
        self._label = nd.zeros([self._length] + list(label_shape), dtype='int32', ctx=ctx)

        uniques = set()
        i = 0
        for dat, dlab in dataset:
            lab = dlab.item()
            if labels is None or np.any([lab == cond for cond in labels]):
                self._data[i] = dat
                self._label[i] = lab
                i += 1
                uniques.add(lab)
        self.classes = list(uniques)
Пример #2
0
def sample(prefix, num_chars, temperature=1.0):
    #####################################
    # Initialize the string that we'll return to the supplied prefix
    #####################################
    string = prefix

    #####################################
    # Prepare the prefix as a sequence of one-hots for ingestion by RNN
    #####################################
    prefix_numerical = [character_dict[char] for char in prefix]
    input = one_hots(prefix_numerical)

    #####################################
    # Set the initial state of the hidden representation ($h_0$) to the zero vector
    #####################################
    h = nd.zeros(shape=(1, num_hidden), ctx=ctx)
    c = nd.zeros(shape=(1, num_hidden), ctx=ctx)

    #####################################
    # For num_chars iterations,
    #     1) feed in the current input
    #     2) sample next character from from output distribution
    #     3) add sampled character to the decoded string
    #     4) prepare the sampled character as a one_hot (to be the next input)
    #####################################
    for i in range(num_chars):
        outputs, h, c = lstm_rnn(input, h, c, temperature=temperature)
        choice = np.random.choice(vocab_size, p=outputs[-1][0].asnumpy())
        string += character_list[choice]
        input = one_hots([choice])
    return string
Пример #3
0
def get_parameters():
    # parameters for INPUT gate
    W_xi = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim))
    W_hi = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim))
    b_i = nd.zeros(shape=config.hidden_dim)

    # parameters for FORGET gate
    W_xf = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim))
    W_hf = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim))
    b_f = nd.zeros(shape=config.hidden_dim)

    # parameters for OUTPUT gate
    W_xo = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim))
    W_ho = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim))
    b_o = nd.zeros(shape=config.hidden_dim)

    # parameters for memory cell
    W_xc = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim))
    W_hc = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim))
    b_c = nd.zeros(shape=config.hidden_dim)

    # output layer
    W_hy = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.output_dim))
    b_y = nd.zeros(shape=config.output_dim)

    parameters = [W_xi, W_hi, b_i,
                  W_xf, W_hf, b_f,
                  W_xo, W_ho, b_o,
                  W_xc, W_hc, b_c,
                  W_hy, b_y]

    for parameter in parameters:
        parameter.attach_grad()

    return parameters
Пример #4
0
def get_parameters():
    # parameters for UPDATE gate
    W_xz = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim))
    W_hz = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim))
    b_z = nd.zeros(shape=config.hidden_dim)

    # parameters for RESET gate
    W_xr = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim))
    W_hr = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim))
    b_r = nd.zeros(shape=config.hidden_dim)

    # parameters for candidate hidden state
    W_xh = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim))
    W_hh = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim))
    b_h = nd.zeros(shape=config.hidden_dim)

    # output layer
    W_hy = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.output_dim))
    b_y = nd.zeros(shape=config.output_dim)

    parameters = [W_xz, W_hz, b_z,
                  W_xr, W_hr, b_r,
                  W_xh, W_hh, b_h,
                  W_hy, b_y]

    for parameter in parameters:
        parameter.attach_grad()

    return parameters
    def forward(self,X,lrp_aware=False):
        '''
        Realizes the forward pass of an input through the convolution layer.

        Parameters
        ----------
        X :         mxnet.ndarray.ndarray.NDArray
                    a network input, shaped (N,H,W,D), with
                    N = batch size
                    H, W, D = input size in heigth, width, depth

        lrp_aware : bool
                    controls whether the forward pass is to be computed with awareness for multiple following
                    LRP calls. this will sacrifice speed in the forward pass but will save time if multiple LRP
                    calls will follow for the current X, e.g. wit different parameter settings or for multiple
                    target classes.

        Returns
        -------
        Y :         mxnet.ndarray.ndarray.NDArray
                    the layer outputs.
        '''

        self.lrp_aware = lrp_aware

        self.X = X
        N,H,W,D = X.shape

        hf, wf, df, nf  = self.W.shape
        hstride, wstride = self.stride
        numfilters = self.n

        #assume the given pooling and stride parameters are carefully chosen.
        Hout = (H - hf) // hstride + 1
        Wout = (W - wf) // wstride + 1


        #initialize pooled output
        self.Y = nd.zeros((N,Hout,Wout,numfilters), ctx=self.ctx, dtype=self.dtype)

        if self.lrp_aware:
            self.Z = nd.zeros((N, Hout, Wout, hf, wf, df, nf), ctx=self.ctx, dtype=self.dtype) #initialize container for precomputed forward messages
            for i in range(Hout):
                for j in range(Wout):
                    self.Z[:,i,j,...] = nd.expand_dims(self.W, axis=0) * nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , :], axis=4) # N, hf, wf, df, nf
                    self.Y[:,i,j,:] = self.Z[:,i,j,...].sum(axis=(1,2,3)) + self.B
        else:
            for i in range(Hout):
                for j in range(Wout):
                    self.Y[:,i,j,:] = nd.sum( nd.expand_dims( X[:, i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ].transpose((1,2,3,0)), 4) * nd.expand_dims(self.W, 3), axis=(0,1,2))  + self.B

        return self.Y
Пример #6
0
def train_and_predict_rnn(rnn, is_random_iter, num_epochs, num_steps,
                          num_hiddens, lr, clipping_theta, batch_size,
                          vocab_size, pred_period, pred_len, prefixes,
                          get_params, get_inputs, ctx, corpus_indices,
                          idx_to_char, char_to_idx, is_lstm=False):
    """Train an RNN model and predict the next item in the sequence."""
    if is_random_iter:
        data_iter = data_iter_random
    else:
        data_iter = data_iter_consecutive
    params = get_params()
    loss = gloss.SoftmaxCrossEntropyLoss()

    for epoch in range(1, num_epochs + 1):
        if not is_random_iter:
            state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
            if is_lstm:
                state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
        train_l_sum = nd.array([0], ctx=ctx)
        train_l_cnt = 0
        for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx):
            if is_random_iter:
                state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
                if is_lstm:
                    state_c = nd.zeros(shape=(batch_size, num_hiddens),
                                       ctx=ctx)
            else:
                state_h = state_h.detach()
                if is_lstm:
                    state_c = state_c.detach()       
            with autograd.record():
                if is_lstm:
                    outputs, state_h, state_c = rnn(
                        get_inputs(X, vocab_size), state_h, state_c, *params) 
                else:
                    outputs, state_h = rnn(
                        get_inputs(X, vocab_size), state_h, *params)
                y = Y.T.reshape((-1,))
                outputs = nd.concat(*outputs, dim=0)
                l = loss(outputs, y)
            l.backward()
            grad_clipping(params, clipping_theta, ctx)
            sgd(params, lr, 1)
            train_l_sum = train_l_sum + l.sum()
            train_l_cnt += l.size
        if epoch % pred_period == 0:
            print("\nepoch %d, perplexity %f"
                  % (epoch, (train_l_sum / train_l_cnt).exp().asscalar()))
            for prefix in prefixes:
                print(' - ', predict_rnn(
                    rnn, prefix, pred_len, params, num_hiddens, vocab_size,
                    ctx, idx_to_char, char_to_idx, get_inputs, is_lstm))
Пример #7
0
def test_gluon_embedding():
    m = gluon.nn.Embedding(SMALL_Y, MEDIUM_X)
    m.initialize()
    a = nd.zeros((MEDIUM_X, SMALL_Y))
    b = m(a)
    assert b.shape == (MEDIUM_X, SMALL_Y, MEDIUM_X)
    assert b.asnumpy().size == LARGE_SIZE
Пример #8
0
def train_ch7(trainer_fn, states, hyperparams, features, labels, batch_size=10,
              num_epochs=2):
    """Train a linear regression model."""
    net, loss = linreg, squared_loss
    w, b = nd.random.normal(scale=0.01, shape=(features.shape[1], 1)), nd.zeros(1)
    w.attach_grad()
    b.attach_grad()

    def eval_loss():
        return loss(net(features, w, b), labels).mean().asscalar()

    ls = [eval_loss()]
    data_iter = gdata.DataLoader(
        gdata.ArrayDataset(features, labels), batch_size, shuffle=True)
    for _ in range(num_epochs):
        start = time.time()
        for batch_i, (X, y) in enumerate(data_iter):
            with autograd.record():
                l = loss(net(X, w, b), y).mean()
            l.backward()
            trainer_fn([w, b], states, hyperparams)
            if (batch_i + 1) * batch_size % 100 == 0:
                ls.append(eval_loss())
    print('loss: %f, %f sec per epoch' % (ls[-1], time.time() - start))
    set_figsize()
    plt.plot(np.linspace(0, num_epochs, len(ls)), ls)
    plt.xlabel('epoch')
    plt.ylabel('loss')
Пример #9
0
    def __setitem__(self, tokens, new_embedding):
        """Updates embedding vectors for tokens.

        If self.allow_extend is True, vectors for previously unknown tokens can be introduced.

        Parameters
        ----------
        tokens : hashable object or a list or tuple of hashable objects
            A token or a list of tokens whose embedding vector are to be updated.
        new_embedding : mxnet.ndarray.NDArray
            An NDArray to be assigned to the embedding vectors of `tokens`. Its length must be equal
            to the number of `tokens` and its width must be equal to the dimension of embedding of
            the glossary. If `tokens` is a singleton, it must be 1-D or 2-D. If `tokens` is a list
            of multiple strings, it must be 2-D.
        """
        if self.allow_extend and self._idx_to_vec is None:
            # Initialize self._idx_to_vec
            assert C.UNK_IDX == 0
            self._idx_to_vec = self._init_unknown_vec(shape=(1, new_embedding.shape[-1]))

        tokens = self._check_vector_update(tokens, new_embedding)

        if self.allow_extend:
            # Add new / previously unknown tokens
            for token in filter(lambda t: t not in self._token_to_idx, tokens):
                idx = len(self._token_to_idx)
                self._token_to_idx[token] = idx
                self._idx_to_token.append(token)

            num_extended = len(self._token_to_idx) - self.idx_to_vec.shape[0]
            if num_extended == 1:
                warnings.warn(
                    'When adding new tokens via TokenEmbedding.__setitem__ '
                    'the internal embedding matrix needs to be reallocated. '
                    'Users are therefore encouraged to batch their updates '
                    '(i.e. add multiple new tokens at a time).')

            # Extend shape of idx_to_vec
            idx_to_vec = nd.zeros(shape=(len(self._token_to_idx),
                                         self.idx_to_vec.shape[1]))
            idx_to_vec[:self.idx_to_vec.shape[0]] = self._idx_to_vec
            self._idx_to_vec = idx_to_vec

        indices = []
        for token in tokens:
            if token in self._token_to_idx:
                indices.append(self._token_to_idx[token])
            else:
                if self.unknown_token:
                    raise KeyError(('Token "{}" is unknown. To update the embedding vector for an'
                                    ' unknown token, please explicitly include "{}" as the '
                                    '`unknown_token` in `tokens`. This is to avoid unintended '
                                    'updates.').format(token, self._idx_to_token[C.UNK_IDX]))
                else:
                    raise KeyError(('Token "{}" is unknown. Updating the embedding vector for an '
                                    'unknown token is not allowed because `unknown_token` is not '
                                    'specified.').format(token))

        self._idx_to_vec[nd.array(indices)] = new_embedding
Пример #10
0
def corr2d(X, K):
    """Compute 2D cross-correlation."""
    h, w = K.shape
    Y = nd.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i: i + h, j: j + w] * K).sum()
    return Y
Пример #11
0
 def test_ndarray2numpy(self):
     m = gluon.nn.Embedding(14000, 128)
     m.initialize()
     ind = nd.zeros((700000, 128))
     x = m(ind)
     x.shape
     test = x.asnumpy()
     assert (x.shape == test.shape)
Пример #12
0
def getfake(samples, dimensions, epsilon):
    wfake = nd.random_normal(shape=(dimensions))  # fake weight vector for separation
    bfake = nd.random_normal(shape=(1))  # fake bias
    wfake = wfake / nd.norm(wfake)  # rescale to unit length

    # making some linearly separable data, simply by chosing the labels accordingly
    X = nd.zeros(shape=(samples, dimensions))
    Y = nd.zeros(shape=(samples))

    i = 0
    while (i < samples):
        tmp = nd.random_normal(shape=(1, dimensions))
        margin = nd.dot(tmp, wfake) + bfake
        if (nd.norm(tmp).asscalar() < 3) & (abs(margin.asscalar()) > epsilon):
            X[i, :] = tmp
            Y[i] = 2 * (margin > 0) - 1
            i += 1
    return X, Y
Пример #13
0
 def transform_mnist(data, label):
     # transform a batch of examples
     if resize:
         n = data.shape[0]
         new_data = nd.zeros((n, resize, resize, data.shape[3]))
         for i in range(n):
             new_data[i] = image.imresize(data[i], resize, resize)
         data = new_data
     # change data from batch x height x weight x channel to batch x channel x height x weight
     return nd.transpose(data.astype('float32'), (0, 3, 1, 2)) / 255, label.astype('float32')
Пример #14
0
 def transform_mnist(data, label):
     # transform a batch of examples
     if resize:#改变形状
         n = data.shape[0]#样本数量 n* 784 *1 ——————> n* 28 * 28 *1 
         new_data = nd.zeros((n, resize, resize, data.shape[3]))#data.shape[3]为通道数量
         for i in range(n):
             new_data[i] = image.imresize(data[i], resize, resize)
         data = new_data
     # change data from batch x height x weight x channel to batch 0 x channel 3 x height 1 x weight 2
     return nd.transpose(data.astype('float32'), (0,3,1,2))/255, label.astype('float32')
Пример #15
0
def plotscore(w, d):
    xgrid = np.arange(-3, 3, 0.02)
    ygrid = np.arange(-3, 3, 0.02)
    xx, yy = np.meshgrid(xgrid, ygrid)
    zz = nd.zeros(shape=(xgrid.size, ygrid.size, 2))
    zz[:, :, 0] = nd.array(xx)
    zz[:, :, 1] = nd.array(yy)
    vv = nd.dot(zz, w) + b
    CS = plt.contour(xgrid, ygrid, vv.asnumpy())
    plt.clabel(CS, inline=1, fontsize=10)
Пример #16
0
def predict_rnn(rnn, prefix, num_chars, params, hidden_dim, ctx, idx_to_char,
                char_to_idx, get_inputs, is_lstm=False):
    """Predict the next chars given the prefix."""
    prefix = prefix.lower()
    state_h = nd.zeros(shape=(1, hidden_dim), ctx=ctx)
    if is_lstm:
        state_c = nd.zeros(shape=(1, hidden_dim), ctx=ctx)
    output = [char_to_idx[prefix[0]]]
    for i in range(num_chars + len(prefix)):
        X = nd.array([output[-1]], ctx=ctx)
        if is_lstm:
            Y, state_h, state_c = rnn(get_inputs(X), state_h, state_c, *params)
        else:
            Y, state_h = rnn(get_inputs(X), state_h, *params)
        if i < len(prefix)-1:
            next_input = char_to_idx[prefix[i+1]]
        else:
            next_input = int(Y[0].argmax(axis=1).asscalar())
        output.append(next_input)
    return ''.join([idx_to_char[i] for i in output])
Пример #17
0
def sample(prefix, num_chars, temperature=1.0):
    string = prefix
    prefix_numerical = [character_dict[char] for char in prefix]
    input = one_hots(prefix_numerical)
    sample_state = nd.zeros(shape(1, num_hidden), ctx=ctx)
    for i in range(num_chars):
        outputs, sample_state = simple_rnn(input, sample_state, temperature=
                temperature)
        choice = np.random.choice(77, p=outputs[-1][0].asnumpy())
        string += character_list[choice]
        input = one_hots([choice])
Пример #18
0
 def smooth(label, classes, eta=0.1):
     if isinstance(label, nd.NDArray):
         label = [label]
     smoothed = []
     for l in label:
         ind = l.astype('int')
         res = nd.zeros((ind.shape[0], classes), ctx = l.context)
         res += eta/classes
         res[nd.arange(ind.shape[0], ctx = l.context), ind] = 1 - eta + eta/classes
         smoothed.append(res)
     return smoothed
Пример #19
0
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        if num_pos_all < 1:
            # no positive samples found, return dummy losses
            return nd.zeros((1,)), nd.zeros((1,)), nd.zeros((1,))

        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < (pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss))
            cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / num_pos_all)

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / num_pos_all)
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
Пример #20
0
def _build_vocab(data_name, train_dataset, test_dataset):
    all_token = []
    max_len = 0
    for i, line in enumerate(train_dataset):
        train_dataset[i][0] = _clean_str(line[0], data_name)
        line = train_dataset[i][0].split()
        max_len = max_len if max_len > len(line) else len(line)
        all_token.extend(line)
    for i, line in enumerate(test_dataset):
        test_dataset[i][0] = _clean_str(line[0], data_name)
        line = test_dataset[i][0].split()
        max_len = max_len if max_len > len(line) else len(line)
    all_token.extend(line)
    vocab = nlp.Vocab(nlp.data.count_tokens(all_token))
    vocab.set_embedding(nlp.embedding.create('Word2Vec', source='GoogleNews-vectors-negative300'))
    for word in vocab.embedding._idx_to_token:
        if (vocab.embedding[word] == nd.zeros(300)).sum() == 300:
            vocab.embedding[word] = nd.random.normal(-1.0, 1.0, 300)
    vocab.embedding['<unk>'] = nd.zeros(300)
    vocab.embedding['<pad>'] = nd.zeros(300)
    vocab.embedding['<bos>'] = nd.zeros(300)
    vocab.embedding['<eos>'] = nd.zeros(300)
    print('maximum length (in tokens): ', max_len)
    return vocab, max_len
Пример #21
0
    def set_embedding(self, *embeddings):
        """Attaches one or more embeddings to the indexed text tokens.


        Parameters
        ----------
        embeddings : None or tuple of :class:`gluonnlp.embedding.TokenEmbedding` instances
            The embedding to be attached to the indexed tokens. If a tuple of multiple embeddings
            are provided, their embedding vectors will be concatenated for the same token.
        """

        if len(embeddings) == 1 and embeddings[0] is None:
            self._embedding = None
            return

        for embs in embeddings:
            assert isinstance(embs, emb.TokenEmbedding), \
                'The argument `embeddings` must be an instance or a list of instances of ' \
                '`gluonnlp.embedding.TokenEmbedding`.'

        assert all([embs.unknown_token for embs in embeddings]) or \
            all([not embs.unknown_token for embs in embeddings]), \
            'Either all or none of the TokenEmbeddings must have an ' \
            'unknown_token set.'

        new_embedding = emb.TokenEmbedding(self.unknown_token, allow_extend=False)
        new_embedding._token_to_idx = self.token_to_idx
        new_embedding._idx_to_token = self.idx_to_token

        new_vec_len = sum(embs.idx_to_vec.shape[1] for embs in embeddings
                          if embs and embs.idx_to_vec is not None)
        new_idx_to_vec = nd.zeros(shape=(len(self), new_vec_len))

        col_start = 0
        # Concatenate all the embedding vectors in embedding.
        for embs in embeddings:
            if embs and embs.idx_to_vec is not None:
                col_end = col_start + embs.idx_to_vec.shape[1]
                # Cancatenate vectors of the unknown token.
                new_idx_to_vec[0, col_start:col_end] = embs.idx_to_vec[0]
                new_idx_to_vec[1:, col_start:col_end] = embs[self._idx_to_token[1:]]
                col_start = col_end

        new_embedding._idx_to_vec = new_idx_to_vec
        self._embedding = new_embedding
Пример #22
0
def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1,
                   stride=1, ctx=None):
    """ An implementation of col2im based on fancy indexing and np.add.at """
    N, C, H, W = x_shape
    H_padded, W_padded = H + 2 * padding, W + 2 * padding
    x_padded = nd.zeros((N, C, H_padded, W_padded), dtype=cols.dtype, ctx=ctx)
    k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, stride, ctx=ctx)
    cols_reshaped = cols.reshape((C * field_height * field_width, -1, N))
    cols_reshaped = cols_reshaped.transpose((2, 0, 1))
    # The for loop is probably a bottleneck, but cannot be avoided without a nd.add.at function
    #for l in nd.arange(cols.shape[1]):
    #    x_padded[:,k,i[:,l], j[:,l]] += cols_reshaped[:,:,l]
    for col in nd.arange(cols.shape[0], ctx=ctx):
        x_padded[:,k[col],i[col,:], j[col,:]] += cols_reshaped[:,col,:]
    #np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped)
    if padding == 0:
        return x_padded
    return x_padded[:, :, padding:-padding, padding:-padding]
    def __init__(self, filtersize=(5,5,3,32), stride = (2,2), ctx=mx.cpu(), dtype='float32'):
        '''
        Constructor for a Convolution layer.

        Parameters
        ----------

        filtersize : 4-tuple with values (h,w,d,n), where
            h = filter heigth
            w = filter width
            d = filter depth
            n = number of filters = number of outputs

        stride : 2-tuple (h,w), where
            h = step size for filter application in vertical direction
            w = step size in horizontal direction

        ctx:    mxnet.context.Context
                device used for all mxnet.ndarray operations

        dtype:  string ('float32' | 'float64')
                dtype used for all mxnet.ndarray operations
                (mxnet default is 'float32', 'float64' supported for easier comparison with numpy)
        '''

        Module.__init__(self)

        self.fh, self.fw, self.fd, self.n = filtersize
        self.stride = stride

        # context sensitive variables
        self.ctx = ctx
        self.W = nd.random.normal(0,1./(self.fh*self.fw*self.fd)**.5, shape=filtersize, ctx=ctx, dtype=dtype)
        self.B = nd.zeros([self.n], ctx=ctx, dtype=dtype)
        self.Y = None
        self.Z = None

        # precision:
        self.dtype = dtype
Пример #24
0
# Deals with only one random variable

import mxnet as mx
from mxnet import nd
import matplotlib
from matplotlib import pyplot as plt

num = 3000

probabilities = nd.ones(6) / 6
rolls = nd.sample_multinomial(probabilities, shape=(num))

counts = nd.zeros((6,num))
totals = nd.zeros(6)

# Counting the number of trials at each step and the total number of rolls
for i, roll in enumerate(rolls):
	totals[ int(roll.asscalar())] += 1
	counts[:, i] = totals

# Generating the probability at each instant by creating an array of 1-n

x = nd.arange(num).reshape((1,num)) + 1
estimates = counts / x
# print(estimates[:, 0])
# print(estimates[:, 1])
# print(estimates[:, num - 1])

# Plotting all of the choices and their probability
plt.plot(estimates[0, :].asnumpy(), label="Estimated P(die=1)")
plt.plot(estimates[1, :].asnumpy(), label="Estimated P(die=2)")
Пример #25
0
from mxnet import nd

# basis of ndarray
# 1. create ndarrays
x = nd.arange(12)
#print("the shape of x: %d" % x.shape)
#print("The size of x: %d" % x.size)
X = x.reshape((3, 4))
Z = nd.zeros((2, 3, 4))
O = nd.ones((3, 4))
Y = nd.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
R = nd.random.normal(0, 1, shape=(3, 4))

# 2. operate ndarrays
Add = X + Y
Mul = X * Y
Div = X / Y
Exp = Y.exp()  # same as np.exp(Y)
Dot = nd.dot(X, Y.T)
CCT_0, CCT_1 = nd.concat(X, Y, dim=0), nd.concat(X, Y, dim=1)
Eq = X == Y
Sum = X.sum()  # same as np.sum(X)
Tran_scal = X.norm().asscalar()

# 3. broadcasting
A = nd.arange(3).reshape((3, 1))
B = nd.arange(2).reshape((1, 2))
broad_add = A + B

# 4. index
sub_array = X[1:3]
Пример #26
0
colors = ['blue', 'green', 'red', 'black', 'magenta']

#plt.imshow(nd.ones((n, n, 3)).asnumpy())
anchors = boxes[20, 20, :, :]
for i in range(anchors.shape[0]):
    plt.gca().add_patch(box_to_rect(anchors[i,:]*n, colors[i]))
#plt.show()

from mxnet.gluon import nn
def class_predictor(num_anchors, num_classes):
    """return a layer to predict classes"""
    return nn.Conv2D(num_anchors * (num_classes + 1), 3, padding=1)

cls_pred = class_predictor(5, 10)
cls_pred.initialize()
x = nd.zeros((2, 3, 20, 20))
y = cls_pred(x)
#print y.shape


def box_predictor(num_anchors):
    """return a layer to predict delta locations"""
    return nn.Conv2D(num_anchors * 4, 3, padding=1)

box_pred = box_predictor(10)
box_pred.initialize()
x = nd.zeros((2, 3, 20, 20))
y = box_pred(x)
#print y.shape

def down_sample(num_filters):
Пример #27
0
'''-----------------------------------------------------'''
#从零开始构建模型
#获取数据
import sys
sys.path.append('..')
from utils import load_data_from_mnist

batch_size = 256
train_data, test_data = load_data_from_mnist(batch_size)

#定义模型
import mxnet as mx
try:
	ctx = mx.gpu()
	_ = nd.zeros((1,), ctx=ctx)
except:
	ctx = mx.cpu()

#定义参数(LeNet)
weight_scale = .01


#output channels = 20, kernel = (5,5)
W1 = nd.random_normal(shape=(20,1,5,5), scale=weight_scale, ctx=ctx)
b1 = nd.zeros(W1.shape[0], ctx=ctx)

#output channels = 50, kernel = (3,3)
W2 = nd.random_normal(shape=(50,20,3,3), scale=weight_scale, ctx=ctx)
b1 = nd,zeros(W2.shape[0], ctx=ctx)
Пример #28
0
n_inputs = 200

true_w = nd.ones(shape=(n_inputs, 1)) * 0.01
true_b = 0.05

features = nd.random.normal(shape=(n_test + n_train, n_inputs))
labels = nd.dot(features, true_w) + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)

train_features, test_features = features[:n_train, :], features[n_train:, :]
train_labels, test_labels = labels[:n_train], labels[n_train:]

# 【初始化模型参数】
w = nd.random.normal(scale=0.01, shape=(n_inputs, 1))

b = nd.zeros(shape=(1, ))

w.attach_grad()
b.attach_grad()

# 【定义模型】


def net(X):
    return nd.dot(X, w) + b


# 【定义L2范数惩罚项】


def l2_penalty(w):
def run_epoch(e, network, dataloader, trainer, log_dir, print_name, update_cnn,
              update_metric, save_cnn):
    '''
    Run one epoch to train or test the SSD network
    
    Parameters
    ----------
        
    e: int
        The epoch number

    network: nn.Gluon.HybridSequential
        The SSD network

    dataloader: gluon.data.DataLoader
        The train or testing dataloader that is wrapped around the iam_dataset
    
    log_dir: Str
        The directory to store the log files for mxboard

    print_name: Str
        Name to print for associating with the data. usually this will be "train" and "test"
    
    update_cnn: bool
        Boolean to indicate whether or not the CNN should be updated. Update_cnn should only be set to true for the training data

    save_cnn: bool
        Boolean to indicate whether or not to save the CNN. 

    Returns
    -------

    network: gluon.nn.HybridSequential
        The class predictor network
    '''

    total_losses = [nd.zeros(1, ctx_i) for ctx_i in ctx]
    for i, (X, Y) in enumerate(dataloader):
        X = gluon.utils.split_and_load(X, ctx)
        Y = gluon.utils.split_and_load(Y, ctx)

        with autograd.record():
            losses = []
            for x, y in zip(X, Y):
                default_anchors, class_predictions, box_predictions = network(
                    x)
                box_target, box_mask, cls_target = network.training_targets(
                    default_anchors, class_predictions, y)
                # losses
                loss_class = cls_loss(class_predictions, cls_target)
                loss_box = box_loss(box_predictions, box_target, box_mask)
                # sum all losses
                loss = loss_class + loss_box
                losses.append(loss)

        if update_cnn:
            for loss in losses:
                loss.backward()
            step_size = 0
            for x in X:
                step_size += x.shape[0]
            trainer.step(step_size)

        for index, loss in enumerate(losses):
            total_losses[index] += loss.mean() / len(ctx)

        if update_metric:
            cls_metric.update([cls_target],
                              [nd.transpose(class_predictions, (0, 2, 1))])
            box_metric.update([box_target], [box_predictions * box_mask])

        if i == 0 and e % send_image_every_n == 0 and e > 0:
            cls_probs = nd.SoftmaxActivation(nd.transpose(
                class_predictions, (0, 2, 1)),
                                             mode='channel')
            output_image, number_of_bbs = generate_output_image(
                box_predictions, default_anchors, cls_probs, box_target,
                box_mask, cls_target, x, y)
            print("Number of predicted {} BBs = {}".format(
                print_name, number_of_bbs))
            with SummaryWriter(logdir=log_dir, verbose=False,
                               flush_secs=5) as sw:
                sw.add_image('bb_{}_image'.format(print_name),
                             output_image,
                             global_step=e)

    total_loss = 0
    for loss in total_losses:
        total_loss = loss.asscalar()
    epoch_loss = float(total_loss) / len(dataloader)

    with SummaryWriter(logdir=log_dir, verbose=False, flush_secs=5) as sw:
        if update_metric:
            name1, val1 = cls_metric.get()
            name2, val2 = box_metric.get()
            sw.add_scalar(name1, {"test": val1}, global_step=e)
            sw.add_scalar(name2, {"test": val2}, global_step=e)
        sw.add_scalar('loss', {print_name: epoch_loss}, global_step=e)

    if save_cnn and e % save_every_n == 0 and e > 0:
        network.save_parameters("{}/{}".format(checkpoint_dir,
                                               checkpoint_name))
    return epoch_loss
Пример #30
0
 def set_ctx(self):
     try:
         self.__ctx = mx.gpu()
         _ = nd.zeros((1, ), ctx=self.__ctx)
     except:
         self.__ctx = mx.cpu()
Пример #31
0
import mxnet as mx
import numpy as np
import random
import time

dirTrain='D:\\image\\txt\\2l\\'

ctx=mx.gpu()

f=np.loadtxt(dirTrain+"image_train_features.txt",delimiter=' ')
l=np.loadtxt(dirTrain+"image_train_labels.txt",delimiter=' ')


features=nd.array(f).copyto(ctx)
labels=nd.array(l).copyto(ctx)
labels_test=nd.zeros(labels.shape,ctx)

data_num=len(f)
batch_size=500

dataset=gdata.ArrayDataset(features,labels)
data_iter = gdata.DataLoader(dataset,batch_size,shuffle=True)




net = nn.Sequential()
net.add(nn.Dense(100,activation='relu'),
	nn.Dense(100,activation='relu'),
	nn.Dense(3))
net.initialize(init.Uniform(scale=20),ctx=ctx)
Пример #32
0
# 28 * 28 = 784(图片的分辨率为28 x 28,总像素为784,所以输入层大小为784)
num_inputs = 784

# 设置输出层的大小为10(输出层10个类别,最终输出层的个数为10,这个是输入图像分类问题)
num_outputs = 10

# shape=(num_inputs, num_outputs)
#       784     x   10
# 生成初始数据W,W为均值是0,标准差为0.01的正态分布
# (随便填写什么数值,反正后面会用梯度下降来校验)
W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs), ctx=mx.gpu())

# y = w(1)1 * x1 + w(1)2 * x2 + w(1)3 * x3 + ...... + w(1)784 * x784 + b(1)
# 生成偏置数据,初始数值0(长度为10,分别为b(1).b(2).b(3).b(4)......b(10))
b = nd.zeros(num_outputs, ctx=mx.gpu())

# W生成梯度
W.attach_grad()

# b生成梯度
b.attach_grad()

##-----X.sum(axis=1, keepdims=True)---------------------------------W
##-----求SUM值的测试例子-----------------------------------------------W
##------------------------------------------------------------------W
X = nd.array([[1, 2, 3], [4, 5, 6]], ctx=mx.gpu())

# 按照列求值
#   1 2 3
#   4 5 6
Пример #33
0
def test(ctx,val_data):
    acc_top1.reset()
    acc_top5.reset()    
    #L = gluon.loss.SoftmaxCrossEntropyLoss()    
    #L2 = gluon.loss.L2Loss(weight=1.0)
    #L2.initialize()
    num_test_iter = len(val_data)
    val_d_loss = 0
    val_epoch_loss = 0
    val_mse_loss = 0
    val_cheat_loss = 0
    val_pre_loss = 0    
    for i, batch in enumerate(val_data):
        val_output = []
        data, label = batch_fn(batch, ctx)
        for X, y in zip(data,label):            
            X1 = takeT(X)
            X2 = takeT(X,T=opt.predict_T)
            X1 = X1.reshape((-1,) + X1.shape[2:]) # for reconstraction feed g
            X2 = X2.reshape((-1,) + X2.shape[2:]) # for prodiction feed d

            pred, latel = net(X1.astype(opt.dtype, copy=False)) 
            val_output.append(pred)
            x_hat = net1(latel[0].astype(opt.dtype, copy=False),
                         latel[1].astype(opt.dtype, copy=False),latel[2].astype(opt.dtype, copy=False))
            # AutoGrad train d 
            #with ag.record():
            x_hat_reshape = nd.transpose(data=x_hat ,axes=(0,2,1,3,4))
            x_hat_reshape = x_hat_reshape.reshape((-1,) + x_hat_reshape.shape[2:]) #.reshape                
            x2_reshape = nd.transpose(data=X2,axes=(0,2,1,3,4))
            x2_reshape = x2_reshape.reshape((-1,) + x2_reshape.shape[2:]) #.reshape
            
            d_pred_real = net2(x2_reshape.astype(opt.dtype, copy=False))
            d_pred_fake = net2(x_hat_reshape.astype(opt.dtype, copy=False))                
            loss_d = loss_fn(d_pred_real,nd.ones(shape=(batch_size),ctx=ctx[0])) + loss_fn(d_pred_fake,nd.zeros(shape=(batch_size),ctx=ctx[0]))
                       
            # train g
            loss_g_l2 = loss_l2(x_hat, X1.astype(opt.dtype, copy=False) ) + loss_l2(x_hat, X2.astype(opt.dtype, copy=False) )
            loss_g_cheat = loss_fn(d_pred_fake, nd.ones(shape=(batch_size),ctx=ctx[0]) )#net2(x_hat_reshape))
            loss_g_ft = loss_fn(pred, y.astype(opt.dtype, copy=False))
            loss_g = loss_g_l2 + loss_g_cheat + loss_g_ft 

            val_epoch_loss += loss_g.mean().asscalar() / len(label)
            val_d_loss += loss_d.mean().asscalar() / len(label)            
            val_mse_loss += loss_g_l2.mean().asscalar() / len(label)
            val_cheat_loss += loss_g_cheat.mean().asscalar() / len(label)
            val_pre_loss += loss_g_ft.mean().asscalar() / len(label)                
            
        acc_top1.update(label, val_output)
        acc_top5.update(label, val_output)        
    
    _, top1 = acc_top1.get()
    _, top5 = acc_top5.get()
    
    val_dloss = val_d_loss / num_test_iter
    val_loss = val_epoch_loss / num_test_iter
    loss_mse = val_mse_loss / num_test_iter
    loss_pre = val_pre_loss / num_test_iter
    loss_cheat = val_cheat_loss / num_test_iter
    
    return (top1, top5, val_loss, loss_mse,loss_pre,loss_cheat,val_dloss)
Пример #34
0
            with ag.record():
                """ making x_hat : feeding X1 into net and net1 """
                _ , latel = net(X1.astype(opt.dtype, copy=False)) 
                #output.append(pred)                
                x_hat = net1(latel[0].astype(opt.dtype, copy=False),
                             latel[1].astype(opt.dtype, copy=False),latel[2].astype(opt.dtype, copy=False))
                # train d 

                x_hat_reshape = nd.transpose(data=x_hat ,axes=(0,2,1,3,4))
                x_hat_reshape = x_hat_reshape.reshape((-1,) + x_hat_reshape.shape[2:]) #.reshape                
                x2_reshape = nd.transpose(data=X2,axes=(0,2,1,3,4))
                x2_reshape = x2_reshape.reshape((-1,) + x2_reshape.shape[2:]) #.reshape
                """ train discriminator """
                d_pred_real = net2(x2_reshape.astype(opt.dtype, copy=False))        # feeding real X2
                d_pred_fake = net2(x_hat_reshape.astype(opt.dtype, copy=False))     # feeding fake x_hat               
                loss_d = loss_fn(d_pred_real,nd.ones(shape=(batch_size),ctx=ctx[0])) + loss_fn(d_pred_fake,nd.zeros(shape=(batch_size),ctx=ctx[0]))
                loss_d.backward()
                
            trainer_d.step(batch_size,ignore_stale_grad=True)
            # train g
            with ag.record():
                """ generation x_hat : feeding X1 into net  4 predicting X2"""
                pred, latel = net(X1.astype(opt.dtype, copy=False))
                output.append(pred)
                x_hat = net1(latel[0].astype(opt.dtype, copy=False),
                             latel[1].astype(opt.dtype, copy=False),latel[2].astype(opt.dtype, copy=False))
                """ reconstruction X1 and predicting X2 """
                loss_g_l2 = loss_l2(x_hat, X1.astype(opt.dtype, copy=False) ) + loss_l2(x_hat, X2.astype(opt.dtype, copy=False) )
                """ cheat discriminator """
                loss_g_cheat = loss_fn(d_pred_fake, nd.ones(shape=(batch_size),ctx=ctx[0]) )#net2(x_hat_reshape))
                """ finetuning btw """
Пример #35
0
    if keep_prob == 0:
        return X.zeros_like()
    mask = nd.random.uniform(0, 1, X.shape) < keep_prob
    return mask * X / keep_prob


X = nd.arange(16).reshape((2, 8))
print(dropout(X, 0))
print(dropout(X, 0.5))
print(dropout(X, 1))

# 定义模型参数
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256

W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens1))
b1 = nd.zeros(num_hiddens1)
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2))
b2 = nd.zeros(num_hiddens2)
W3 = nd.random.normal(scale=0.01, shape=(num_hiddens2, num_outputs))
b3 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2, W3, b3]
for param in params:
    param.attach_grad()

# 定义模型
drop_prob1, drop_prob2 = 0.2, 0.5


def net(X):
    X = X.reshape((-1, num_inputs))
Пример #36
0
def train_network(net, lr, input_shape, batch_size, train_path, test_path,
                  epoch, ctx):
    train_data, val_data = prepare_data(train_path, test_path, input_shape,
                                        batch_size)

    for X, y in train_data:
        print("X shape {}, y shape", X.shape, y.shape)
        break

    net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

    net.summary(nd.zeros(shape=(1, 3) + input_shape, ctx=ctx))

    net.hybridize()

    lr_sched = mx.lr_scheduler.FactorScheduler(2000, factor=0.6, base_lr=1.0)
    optim = mx.optimizer.SGD(learning_rate=lr,
                             momentum=0.9,
                             wd=0.0001,
                             lr_scheduler=lr_sched)
    trainer = gluon.Trainer(net.collect_params(), optim)

    loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()

    train_acc_meter = mx.metric.Accuracy()
    train_loss_meter = mx.metric.CrossEntropy()

    hybridized = False

    with mxboard.SummaryWriter(logdir="./vgg_logs", flush_secs=60) as sw:
        for ep in range(1, epoch + 1):
            epoch_start = timeit.default_timer()

            train_acc_meter.reset()
            train_loss_meter.reset()

            print("Current Learning Rate: {}".format(trainer.learning_rate))
            for it, (data, label) in enumerate(train_data):
                data = data.as_in_context(ctx)
                label = label.as_in_context(ctx)

                with autograd.record():
                    output = net(data)
                    loss_val = loss_fn(output, label)
                loss_val.backward()
                trainer.step(data.shape[0])

                train_acc_meter.update(preds=[output], labels=[label])
                train_loss_meter.update(labels=[label],
                                        preds=[nd.softmax(output, axis=1)])

                if it % 10 == 0:
                    print(
                        "Epoch {}, batch {}, train loss {:.4f}, train acc {:.4f}"
                        .format(ep, it,
                                train_loss_meter.get()[1],
                                train_acc_meter.get()[1]))

            nd.waitall()
            epoch_stop = timeit.default_timer()

            val_loss, val_acc = evaluate(val_data, net, ctx)
            nd.waitall()
            print(
                "Epoch {}, Training time {}, learning rate {}, validation loss {:.5f}, validatoin acc {:.5f}"
                .format(ep, epoch_stop - epoch_start, trainer.learning_rate,
                        val_loss, val_acc))
            sw.add_scalar(tag="train_loss",
                          value=train_loss_meter.get()[1],
                          global_step=ep)
            sw.add_scalar(tag="train_acc",
                          value=train_acc_meter.get()[1],
                          global_step=ep)
            sw.add_scalar(tag="val_acc", value=val_acc, global_step=ep)
            sw.add_scalar(tag="val_loss", value=val_loss, global_step=ep)
            sw.add_scalar(tag="learning_rate",
                          value=trainer.learning_rate,
                          global_step=ep)
            if not hybridized:
                sw.add_graph(net)
                hybridized = True

            if ep % 2 == 0:
                net.export("vgg_models/vgg", ep)

    return net
Пример #37
0
 def get_linear_dense_input(self, input_sample):
     y = nd.zeros(shape=(input_sample.shape[0], 1), ctx=self.ctx)
     for single_feat in self.feature_dict['dense']:
         x = input_sample[:, single_feat.feat_name].reshape((-1,1))
         y = nd.concat(y,x,dim=1)
     return y[:, 1:]
Пример #38
0
    def render(self, bg, mode, pascal_rate=0.0, render_rate=1.0):
        '''
        Parameters
        ----------
        bg: mxnet.ndarray(4D)
          background array,
          dimension = bs * channel * h * w
        mode: str, {'train', 'valid'}
          use training dataset or not
        pascal: boolean
          use pascal_3D dataset or not
        render_rate: float
          probability of image contain a car
        pascal_rate: float
          probability of use pascal dataset

        Returns
        ----------
        img_batch: mxnet.ndarray(4D)
          same as bg input
        label_batch: mxnet.ndarray(3D)
          bs * object * [cls, y(0~1), x(0~1), h(0~1), w(0~1), r(+-pi), all labels prob]
        '''
        bs = len(bg)
        ctx = self.ctx

        mask = nd.zeros((bs, 3, self.h, self.w), ctx=ctx)
        img_batch = nd.zeros((bs, 3, self.h, self.w), ctx=ctx)
        label_batch = nd.ones((bs, 1, 6 + self.num_cls), ctx=ctx) * (-1)

        for i in range(bs):
            if np.random.rand() > render_rate:
                continue

            r1 = np.random.uniform(low=0.9, high=1.1)
            if np.random.rand() < pascal_rate:
                pil_img, r_box_l, r_box_t, r_box_r, r_box_b, r, \
                    img_cls, label_distribution = self._render_pascal(mode, r1)

            else:

                pil_img, r_box_l, r_box_t, r_box_r, r_box_b, r, \
                    img_cls, label_distribution = self._render_png(mode, r1)

            r_box_w = r_box_r - r_box_l  # r_box_xx means after rotate
            r_box_h = r_box_b - r_box_t  # r_box_xx means after rotate

            # -------------------- move -------------------- #
            paste_x = np.random.randint(low=int(-r_box_l - 0.3 * r_box_w),
                                        high=int(self.w - r_box_l -
                                                 0.7 * r_box_w))

            paste_y = np.random.randint(low=int(-r_box_t - 0.3 * r_box_h),
                                        high=int(self.h - r_box_t -
                                                 0.7 * r_box_h))

            box_y = (r_box_b + r_box_t) / 2. + paste_y
            box_x = (r_box_r + r_box_l) / 2. + paste_x
            box_h = float(r_box_b - r_box_t)
            box_w = float(r_box_r - r_box_l)

            # -------------------- -------------------- #
            tmp = PIL.Image.new('RGBA', (self.w, self.h))
            tmp.paste(pil_img, (paste_x, paste_y))

            fg = yolo_gluon.pil_rgb_2_rgb_ndarray(tmp, augs=self.augs)
            img_batch[i] = fg.as_in_context(ctx)

            m = yolo_gluon.pil_mask_2_rgb_ndarray(tmp.split()[-1])
            mask[i] = m.as_in_context(ctx)

            label = nd.array([[
                img_cls, box_y / self.h, box_x / self.w, box_h / self.h,
                box_w / self.w, r
            ]])

            label = nd.concat(label, label_distribution, dim=-1)
            label_batch[i] = label

        ####################################################################
        img_batch = ((bg / 255.) * (1 - mask) + img_batch * mask)
        img_batch = nd.clip(img_batch, 0, 1)
        # 0~1 (batch_size, channels, h, w)
        return img_batch, label_batch
                      rect.xy[1],
                      labels[i],
                      va='center',
                      ha='center',
                      fontsize=9,
                      color=text_color,
                      bbox=dict(facecolor=color, lw=0))


bbox_scale = nd.array((w, h, w, h))
fig = plt.imshow(img)
# show_boxes(fig.axes, boxes[250,250,:,:] * bbox_scale,
#            ['s=0.75,r=1','s=0.5,r=1','s=0.25,r=1','s=0.75,r=2','s=0.75,r=0.5'])
# plt.show()

ground_truth = nd.array([[0, 0.25, 0.1, 0.45, 0.42], [1, 0.55, 0.1, 0.75,
                                                      0.4]])
anchor = nd.array([[0, 0.1, 0.2, 0.3], [0.2, 0.1, 0.5, 0.9],
                   [0.6, 0.1, 0.8, 0.5], [0.55, 0.3, 0.7, 0.5],
                   [0.65, 0.15, 0.8, 0.9]])
show_boxes(fig.axes,
           ground_truth[:, 1:] * bbox_scale,
           labels=['dog', 'cat'],
           colors='k')
show_boxes(fig.axes, anchor * bbox_scale, ['0', '1', '2', '3', '4'])
# plt.show()

labels = contrib.nd.MultiBoxTarget(anchor.expand_dims(axis=0),
                                   ground_truth.expand_dims(axis=0),
                                   nd.zeros((1, 3, 5)))
Пример #40
0
import d2lzh as d2l
from mxnet import autograd, nd

batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

num_input = 784
num_output = 10

W = nd.random.normal(scale=0.01, shape=(num_input, num_output))
b = nd.zeros(num_output)

W.attach_grad()
b.attach_grad()


# softmax函数,对矩阵数据进行e^x
def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum(axis=1, keepdims=True)
    return X_exp / partition


X = nd.random.normal(shape=(2, 5))
X_prob = softmax(X)
X_prob, X_prob.sum(axis=1)


def net(X):
    return softmax(nd.dot(X.reshape((-1, num_input)), W) + b)
Пример #41
0
def train_and_predict_rnn(rnn,
                          is_random_iter,
                          num_epochs,
                          num_steps,
                          num_hiddens,
                          lr,
                          clipping_theta,
                          batch_size,
                          vocab_size,
                          pred_period,
                          pred_len,
                          prefixes,
                          get_params,
                          get_inputs,
                          ctx,
                          corpus_indices,
                          idx_to_char,
                          char_to_idx,
                          is_lstm=False):
    """Train an RNN model and predict the next item in the sequence."""
    if is_random_iter:
        data_iter = data_iter_random
    else:
        data_iter = data_iter_consecutive
    params = get_params()
    loss = gloss.SoftmaxCrossEntropyLoss()

    for epoch in range(1, num_epochs + 1):
        if not is_random_iter:
            state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
            if is_lstm:
                state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
        train_l_sum = nd.array([0], ctx=ctx)
        train_l_cnt = 0
        for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx):
            if is_random_iter:
                state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
                if is_lstm:
                    state_c = nd.zeros(shape=(batch_size, num_hiddens),
                                       ctx=ctx)
            else:
                state_h = state_h.detach()
                if is_lstm:
                    state_c = state_c.detach()
            with autograd.record():
                if is_lstm:
                    outputs, state_h, state_c = rnn(get_inputs(X, vocab_size),
                                                    state_h, state_c, *params)
                else:
                    outputs, state_h = rnn(get_inputs(X, vocab_size), state_h,
                                           *params)
                y = Y.T.reshape((-1, ))
                outputs = nd.concat(*outputs, dim=0)
                l = loss(outputs, y)
            l.backward()
            grad_clipping(params, clipping_theta, ctx)
            sgd(params, lr, 1)
            train_l_sum = train_l_sum + l.sum()
            train_l_cnt += l.size
        if epoch % pred_period == 0:
            print('\nepoch %d, perplexity %f' %
                  (epoch, (train_l_sum / train_l_cnt).exp().asscalar()))
            for prefix in prefixes:
                print(
                    ' - ',
                    predict_rnn(rnn, prefix, pred_len, params, num_hiddens,
                                vocab_size, ctx, idx_to_char, char_to_idx,
                                get_inputs, is_lstm))
Пример #42
0
 def _three():
     return (_one(
         (num_inputs, num_hiddens)), _one(
             (num_hiddens, num_hiddens)), nd.zeros(num_hiddens, ctx=ctx))
Пример #43
0
    _outputs = []

    for X in _inputs:
        # compute INPUT gate from input and last/initial hidden state
        input_gate = nd.sigmoid(nd.dot(X, W_xi) + nd.dot(H, W_hi) + b_i)
        # compute FORGET gate from input and last/initial hidden state
        forget_gate = nd.sigmoid(nd.dot(X, W_xf) + nd.dot(H, W_hf) + b_f)
        # compute OUTPUT gate from input and last/initial hidden state
        output_gate = nd.sigmoid(nd.dot(X, W_xo) + nd.dot(H, W_ho) + b_o)
        # compute memory cell candidate from input and last/initial hidden state
        memory_cell_candidate = nd.tanh(nd.dot(X, W_xc) + nd.dot(H, W_hc) + b_c)
        # compute memory cell from last memory cell and memory cell candidate
        C = forget_gate * C + input_gate * memory_cell_candidate
        # compute hidden state from output gate and memory cell
        H = output_gate * nd.tanh(C)
        # compute output from hidden state
        Y = nd.dot(H, W_hy) + b_y
        _outputs.append(Y)

    return _outputs, H, C


if __name__ == '__main__':
    initial_state_h = nd.zeros(shape=(config.batch_size, config.hidden_dim))
    initial_state_c = nd.zeros(shape=(config.batch_size, config.hidden_dim))
    dump_data = [nd.random_normal(shape=(config.batch_size, config.input_dim)) for _ in range(config.num_steps)]

    parameters = get_parameters()
    _outputs, final_state, memory_cell = lstm(dump_data, initial_state_h, initial_state_c, *parameters)

    print(_outputs, final_state, memory_cell)
Пример #44
0
def transform_preds(coords, center, scale, output_size):
    target_coords = nd.zeros(coords.shape)
    trans = get_affine_transform(center, scale, 0, output_size, inv=1)
    for p in range(coords.shape[0]):
        target_coords[p, 0:2] = affine_transform(coords[p, 0:2].asnumpy(), trans)
    return target_coords
Пример #45
0
########################
#  run the model and generate sample text
########################
epochs = 2000
moving_loss = 0.

learning_rate = 2.0

# state = nd.zeros(shape=(batch_size, num_hidden), ctx=ctx)
for e in range(epochs):
    ############################
    # Attenuate the learning rate by a factor of 2 every 100 epochs.
    ############################
    if ((e+1) % 100 == 0):
        learning_rate = learning_rate / 2.0
    h = nd.zeros(shape=(batch_size, num_hidden), ctx=ctx)
    c = nd.zeros(shape=(batch_size, num_hidden), ctx=ctx)
    for i in range(num_batches):
        data_one_hot = train_data[i]
        label_one_hot = train_label[i]
        with autograd.record():
            outputs, h, c = gru_rnn(data_one_hot, h, c)
            loss = average_ce_loss(outputs, label_one_hot)
            loss.backward()
        SGD(params, learning_rate)

        ##########################
        #  Keep a moving average of the losses
        ##########################
        if (i == 0) and (e == 0):
            moving_loss = nd.mean(loss).asscalar()
Пример #46
0
 def check_ndarray_zeros():
     a = nd.zeros(shape=LARGE_X)
     assert a[-1] == 0
     assert a.shape == (LARGE_X, )
     assert a.size == LARGE_X
Пример #47
0
def one_hots(numerical_list, vocab_size=vocab_size):
    result = nd.zeros((len(numerical_list), vocab_size), ctx=ctx)
    for i, idx in enumerate(numerical_list):
        result[i, idx] = 1.0
    return result
Пример #48
0
 def label_transform(label, classes):
     ind = label.astype('int')
     res = nd.zeros((ind.shape[0], classes), ctx = label.context)
     res[nd.arange(ind.shape[0], ctx = label.context), ind] = 1
     return res
Пример #49
0
def init_rnn_state(batch_size, num_hiddens, ctx):
    return (
        nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx),  #初始化隐藏状态
        nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx))  #初始化记忆细胞
Пример #50
0
 def check_ones_like():
     a = nd.zeros(LARGE_X)
     b = nd.ones_like(a)
     assert b[-1] == 1
     assert b.shape == a.shape
Пример #51
0
                exnd = nds_scale[__pw][:, :exin]

                nds_target[__pw] = nd.concat(*[exnd, nds_scale[__pw]], dim=1)

            else:

                exout = int(cout * expand)
                exin = int(cin * expand)

                exnd_0 = nds_scale[__pw][:exout]

                tmp = nd.concat(*[exnd_0, nds_scale[__pw]], dim=0)

                exnd_1 = tmp[:, :exin]

                nds_target[__pw] = nd.concat(*[exnd_1, tmp], dim=1)

        else:

            nds_target[__pw] = nds_scale[__w]

        #print('---------------------------')

        #print('{} : {}'.format(__w, nds_scale[__w].shape))

        #print('{} : {}'.format(__pw, nds_target[__pw].shape))

nds_target['arg:fc7_bias'] = nd.zeros([1000, 1])

nd.save('mobilenet_v2_{}-0000.params'.format(scale), nds_target)
Пример #52
0
def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim, 
                          learning_rate, clipping_norm, batch_size,
                          pred_period, pred_len, seqs, get_params, get_inputs,
                          ctx, corpus_indices, idx_to_char, char_to_idx,
                          is_lstm=False):
    """Train an RNN model and predict the next item in the sequence."""
    if is_random_iter:
        data_iter = data_iter_random
    else:
        data_iter = data_iter_consecutive
    params = get_params()
    
    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

    for e in range(1, epochs + 1): 
        # If consecutive sampling is used, in the same epoch, the hidden state
        # is initialized only at the beginning of the epoch.
        if not is_random_iter:
            state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            if is_lstm:
                state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
        train_loss, num_examples = 0, 0
        for data, label in data_iter(corpus_indices, batch_size, num_steps, 
                                     ctx):
            # If random sampling is used, the hidden state has to be
            # initialized for each mini-batch.
            if is_random_iter:
                state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
                if is_lstm:
                    state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            with autograd.record():
                # outputs shape: (batch_size, vocab_size)
                if is_lstm:
                    outputs, state_h, state_c = rnn(get_inputs(data), state_h,
                                                    state_c, *params) 
                else:
                    outputs, state_h = rnn(get_inputs(data), state_h, *params)
                # Let t_ib_j be the j-th element of the mini-batch at time i.
                # label shape: (batch_size * num_steps)
                # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ].
                label = label.T.reshape((-1,))
                # Concatenate outputs:
                # shape: (batch_size * num_steps, vocab_size).
                outputs = nd.concat(*outputs, dim=0)
                # Now outputs and label are aligned.
                loss = softmax_cross_entropy(outputs, label)
            loss.backward()

            grad_clipping(params, clipping_norm, ctx)
            SGD(params, learning_rate)

            train_loss += nd.sum(loss).asscalar()
            num_examples += loss.size

        if e % pred_period == 0:
            print("Epoch %d. Training perplexity %f" % (e, 
                                               exp(train_loss/num_examples)))
            for seq in seqs:
                print(' - ', predict_rnn(rnn, seq, pred_len, params,
                      hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs,
                      is_lstm))
            print()
Пример #53
0
def init_momentum_states():
    v_w = nd.zeros((features.shape[1], 1))
    v_b = nd.zeros(1)
    return (v_w, v_b)
        x = self.models(x)
        x = self.margin_inner_product(x, label)
        return self.softmax_loss(x, label)

    def _main_net(self, layer_type, feature_dim, label_num):
        model = nn.Sequential()
        if layer_type == "20layer":
            model.add(
                CNNResidualBlock(64, 64, 1),
                CNNResidualBlock(128, 128, 2),
                CNNResidualBlock(256, 256, 4),
                CNNResidualBlock(512, 512, 1)
            )
        else:
            raise Exception("Unsupport layer type.")
        model.add(nn.Dense(feature_dim))
        return model


if __name__ == "__main__":
    margin_params = {"feature_dim": 512, "label_num": 6, "lamb_iter": 0, "lamb_base": 1000,
                     "lamb_gamma": 0.12, "lamb_power": 1, "lamb_min": 10}
    margin_params["layer_type"] = "20layer"
    test = SphereFaceNet(512, 6, margin_params)
    print test
    test.initialize(ctx=mx.gpu())
    x = nd.random.uniform(shape=(2, 3, 112, 112), ctx=mx.gpu())
    label = nd.zeros([2], ctx=mx.gpu())
    print test(x, label)

Пример #55
0
loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()

# initialization
g_net.collect_params().initialize(mx.init.Xavier(), ctx=CTX)
d_net.collect_params().initialize(mx.init.Xavier(), ctx=CTX)
g_trainer = gluon.Trainer(
    g_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT})
d_trainer = gluon.Trainer(
    d_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT})
g_net.collect_params().zero_grad()
d_net.collect_params().zero_grad()
# define evaluation metric
metric = mx.metric.CustomMetric(facc)
# initialize labels
real_label = nd.ones(BATCH_SIZE, CTX)
fake_label = nd.zeros(BATCH_SIZE, CTX)

for epoch in range(NUM_EPOCHS):
    for i, (d, _) in enumerate(train_data):
        # update D
        data = d.as_in_context(CTX)
        noise = nd.normal(loc=0, scale=1, shape=(
            BATCH_SIZE, Z_DIM, 1, 1), ctx=CTX)
        with autograd.record():
            # train with real image
            output = d_net(data).reshape((-1, 1))
            errD_real = loss(output, real_label)
            metric.update([real_label, ], [output, ])

            # train with fake image
            fake_image = g_net(noise)
Пример #56
0
    def forward(self, input_data):
        ep1 = input_data[:, 0].asnumpy().astype(np.int).tolist()
        ep2 = input_data[:, 1].asnumpy().astype(np.int).tolist()
        x_sen = input_data[:, 2:DIMENSION * FIXED_WORD_LENGTH + 2].reshape(
            (input_data.shape[0], FIXED_WORD_LENGTH, DIMENSION))
        e1_start = DIMENSION * FIXED_WORD_LENGTH + 2
        e1_infobox = input_data[:, e1_start:e1_start + INFOBOX_LENGTH * INFOBOX_VALUE_LENGTH * DIMENSION].reshape(
            (input_data.shape[0], INFOBOX_LENGTH, INFOBOX_VALUE_LENGTH,
             DIMENSION))  # (batch_size,INFOBOX_LENGTH,INFOBOX_VALUE_LENGTH,100)
        e2_start = e1_start + INFOBOX_LENGTH * INFOBOX_VALUE_LENGTH * DIMENSION
        e2_infobox = input_data[:, e2_start:e2_start + INFOBOX_LENGTH * INFOBOX_VALUE_LENGTH * DIMENSION].reshape(
            (input_data.shape[0], INFOBOX_LENGTH, INFOBOX_VALUE_LENGTH,
             DIMENSION))  # (batch_size,INFOBOX_LENGTH,INFOBOX_VALUE_LENGTH,100)
        h_sen = self.lstm(x_sen).expand_dims(axis=1)  # (batch_size,60,128)
        be1_mask = nd.zeros(h_sen.shape, ctx=CTX)
        aes_mask = nd.zeros(h_sen.shape, ctx=CTX)
        be2_mask = nd.zeros(h_sen.shape, ctx=CTX)
        be1_pad = nd.ones(h_sen.shape, ctx=CTX) * (-100)
        aes_pad = nd.ones(h_sen.shape, ctx=CTX) * (-100)
        be2_pad = nd.ones(h_sen.shape, ctx=CTX) * (-100)
        for i in range(x_sen.shape[0]):
            if ep1[i] == 0:
                ep1[i] += 1
                ep2[i] += 1
            be1_mask[i, :, :ep1[i], :] = 1
            be1_pad[i, :, :ep1[i], :] = 0
            aes_mask[i, :, ep1[i]:ep2[i], :] = 1
            aes_pad[i, :, ep1[i]:ep2[i], :] = 0
            be2_mask[i, :, ep2[i]:, :] = 1
            be2_pad[i, :, ep2[i]:, :] = 0
        be1 = h_sen * be1_mask
        aes = h_sen * aes_mask
        be2 = h_sen * be2_mask
        be1 = be1 + be1_pad
        aes = aes + aes_pad
        be2 = be2 + be2_pad
        o1 = self.pool(be1)  # (128, 1, 3, 128)
        o2 = self.pool(aes)
        o3 = self.pool(be2)
        y_sen = nd.concat(o1, o2, o3, dim=2)  # (128, 384)
        y_out = self.sen_out(y_sen)
        e1_infobox_list_all = nd.ones((e1_infobox.shape[0], e1_infobox.shape[1], 51, 1),
                                      ctx=CTX)  # (batch_size,INFOBOX_LENGTH,51,1)
        e2_infobox_list_all = nd.ones((e1_infobox.shape[0], e2_infobox.shape[1], 51, 1),
                                      ctx=CTX)  # (batch_size,INFOBOX_LENGTH,51,1)

        for i in range(e1_infobox.shape[0]):
            e1 = self.conv1(x_sen[i].expand_dims(axis=0).expand_dims(axis=1), e1_infobox[i].expand_dims(axis=1))
            #             e1_p = self.pool(e1)
            e1_infobox_list_all[i] = e1.reshape((e1.shape[1], e1.shape[2], e1.shape[3]))
            e2 = self.conv2(x_sen[i].expand_dims(axis=0).expand_dims(axis=1), e2_infobox[i].expand_dims(axis=1))
            #             e2_p = self.pool(e2)
            e2_infobox_list_all[i] = e2.reshape((e2.shape[1], e2.shape[2], e2.shape[3]))

        e1_infobox_list_all = e1_infobox_list_all.reshape(
            (e1_infobox.shape[0], e1_infobox.shape[1], -1))  # (batch_size,INFOBOX_LENGTH,51)
        e2_infobox_list_all = e2_infobox_list_all.reshape(
            (e2_infobox.shape[0], e2_infobox.shape[1], -1))  # (batch_size,INFOBOX_LENGTH,51)

        e1_infobox_list_all_new = self.dense1(e1_infobox_list_all)
        e2_infobox_list_all_new = self.dense2(e2_infobox_list_all)

        #         g1 = nd.softmax(self.att(e1_infobox_list_all),axis=2) # (batch_size,INFOBOX_LENGTH,1)
        #         g2 = nd.softmax(self.att(e2_infobox_list_all),axis=2) # (batch_size,INFOBOX_LENGTH,1)
        #         g1_att = nd.batch_dot(nd.transpose(g1,axes = (0,2,1)),e1_infobox_list_all) # (batch_size,1,64)
        #         g2_att = nd.batch_dot(nd.transpose(g2,axes = (0,2,1)),e2_infobox_list_all) # (batch_size,1,64)
        #         g1_att = g1_att.reshape((g1_att.shape[0],-1)) # (batch_size,64)
        #         g2_att = g2_att.reshape((g2_att.shape[0],-1)) # (batch_size,64)

        # (batch_size,128)
        e_infobox_list_all_att = nd.concat(e1_infobox_list_all_new, e2_infobox_list_all_new, dim=1)
        y_infobox = self.infobox_out(e_infobox_list_all_att)
        # h_sen_new = self.lstm_out(h_sen.expand_dims(1))
        # h_sen_new = h_sen_new.reshape((h_sen_new.shape[0], -1))  # (batch_size,128)
        # (batch_size,256)
        h_sen_infobox = nd.concat(y_out, y_infobox, dim=1)  # (128, 384) (128, 768)
        y = self.output(h_sen_infobox)
        return y
Пример #57
0
import mxnet as mx
import os
from mxnet import autograd, nd
from mxboard import SummaryWriter
import random

train_data = nd.random.uniform(-1, 1, shape=(1000, 2))
true_w = nd.array([[5.3, 6.5]])
true_b = nd.array([[8.6]])
train_label = nd.dot(train_data, nd.transpose(true_w)) + true_b

# print(train_label)

weight = nd.random.normal(scale=1.0, shape=(1, 2))
bias = nd.zeros(shape=(1, 1))

# print(weight)
# print(bias)


def data_iter(datas, labels, batchsize):
    data_len = len(datas)
    indices = list(range(data_len))
    random.shuffle(indices)
    for i in range(0, data_len, batchsize):
        j = nd.array(indices[i:min(i + batchsize, data_len)])
        yield (datas.take(j), labels.take(j))


def mlp(x, w, b):
    return nd.dot(x, nd.transpose(w)) + b
Пример #58
0
def train_and_predict_rnn(rnn,
                          is_random_iter,
                          epochs,
                          num_steps,
                          hidden_dim,
                          learning_rate,
                          clipping_norm,
                          batch_size,
                          pred_period,
                          pred_len,
                          seqs,
                          get_params,
                          get_inputs,
                          ctx,
                          corpus_indices,
                          idx_to_char,
                          char_to_idx,
                          is_lstm=False):
    """Train an RNN model and predict the next item in the sequence."""
    if is_random_iter:
        data_iter = data_iter_random
    else:
        data_iter = data_iter_consecutive
    params = get_params()

    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

    for e in range(1, epochs + 1):
        # If consecutive sampling is used, in the same epoch, the hidden state
        # is initialized only at the beginning of the epoch.
        if not is_random_iter:
            state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            if is_lstm:
                state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
        train_loss, num_examples = 0, 0
        for data, label in data_iter(corpus_indices, batch_size, num_steps,
                                     ctx):
            # If random sampling is used, the hidden state has to be
            # initialized for each mini-batch.
            if is_random_iter:
                state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
                if is_lstm:
                    state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            with autograd.record():
                # outputs shape: (batch_size, vocab_size)
                if is_lstm:
                    outputs, state_h, state_c = rnn(get_inputs(data), state_h,
                                                    state_c, *params)
                else:
                    outputs, state_h = rnn(get_inputs(data), state_h, *params)
                # Let t_ib_j be the j-th element of the mini-batch at time i.
                # label shape: (batch_size * num_steps)
                # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ].
                label = label.T.reshape((-1, ))
                # Concatenate outputs:
                # shape: (batch_size * num_steps, vocab_size).
                outputs = nd.concat(*outputs, dim=0)
                # Now outputs and label are aligned.
                loss = softmax_cross_entropy(outputs, label)
            loss.backward()

            grad_clipping(params, clipping_norm, ctx)
            SGD(params, learning_rate)

            train_loss += nd.sum(loss).asscalar()
            num_examples += loss.size

        if e % pred_period == 0:
            print("Epoch %d. Training perplexity %f" %
                  (e, exp(train_loss / num_examples)))
            for seq in seqs:
                print(
                    ' - ',
                    predict_rnn(rnn, seq, pred_len, params, hidden_dim, ctx,
                                idx_to_char, char_to_idx, get_inputs, is_lstm))
            print()
def init_rnn_state(batch_size, num_hiddens, ctx):
    return (nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx),
            )  # 元祖表示不会改变数组,用圆括号表示
Пример #60
0
def label_transform(label, classes):
    ind = label.astype('int')
    res = nd.zeros((ind.shape[0], classes), ctx = label.context)
    res[nd.arange(ind.shape[0], ctx = label.context), ind] = 1
    return res