Python try_gpu示例，gluonbook.try_gpu Python示例

示例#1

0

显示文件

def main():
    ctx = gb.try_gpu()
    conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512))
    num_epoch = 100
    lr = 0.0001
    wd = 1e-4
    lr_period = 10
    lr_decay = 0.1
    net = vgg(conv_arch)
    net.initialize(ctx=ctx)
    net.hybridize()
    batch_size = 128
    #print (train_iter)
    train_path = 'noface_dataiter_train.txt'
    val_path = 'noface_dataiter_val.txt'
    train(net, train_path, val_path, lr, wd, ctx, num_epoch, lr_period,
          lr_decay, batch_size, 112)

示例#2

0

显示文件

    def __init__(self, num_category, **kwargs):
        super(Net1, self).__init__(**kwargs)
        with self.name_scope():
            # layers created in name_scope will inherit name space
            # from parent layer.
            self.bn = nn.BatchNorm()
            self.dropout = nn.Dropout(0.3)
            self.fc1 = nn.Dense(4096, activation="relu")
            self.fc2 = nn.Dense(num_category)
            self.image_lstm = gluon.rnn.LSTM(hidden_size=1024, num_layers=5)
            '''
            self.lstm_cell = [gluon.rnn.LSTMCell(hidden_size=100) for i in range(5)]
            self.h = [nd.random.uniform(shape=(15, 100)) for i in range(5)]
            self.c = [nd.random.uniform(shape=(15, 100)) for i in range(5)]
            '''

            self.question_lstm = gluon.rnn.LSTM(hidden_size=100, num_layers=12)
            self.image_fc = nn.Dense(1024, activation="relu")
            self.question_fc = nn.Dense(1024, activation="relu")
            self.ctx = gb.try_gpu()

示例#3

0

显示文件

文件： ssd.py 项目： yuanjunyi/object-detection

    '''
    train_data.data_shape is (3, 256, 256)
    test_data.data_shape is (1, 5)

    1 is the number of bbox in each image in pikachu dataset.
    We need to make sure each image has the same number of bbox.
    For those have fewer bboxes, fill illegal bbox to reach this number
    so that images can be processed in batch.

    Each bbox has 5 elements [class, x, y, x, y]. class -1 represnets
    illegal.

    Each image is required to have at least 3 bbox in GPU implementation.
    '''
    train_data.reshape(label_shape=(3, 5))
    ctx = gb.try_gpu()
    net = TinySSD(num_classes=2)
    net.initialize(init=init.Xavier(), ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {
        'learning_rate': 0.1,
        'wd': 5e-4
    })

    for epoch in range(5):
        acc, mae = 0, 0
        train_data.reset()  # Resets the iterator to the beginning of the data.
        tic = time.time()
        for i, batch in enumerate(train_data):
            # batch.data is a list of length 1
            X = batch.data[0].as_in_context(ctx)
            Y = batch.label[0].as_in_context(ctx)

示例#4

0

显示文件

                  strides,
                  padding,
                  activation='relu'),
        nn.Conv2D(num_channels, kernel_size=1, activation='relu'),
        nn.Conv2D(num_channels, kernel_size=1, activation='relu'))
    return blk


net = nn.Sequential()
net.add(nin_block(96, kernel_size=11, strides=4, padding=0),
        nn.MaxPool2D(pool_size=3, strides=2),
        nin_block(256, kernel_size=5, strides=1, padding=2),
        nn.MaxPool2D(pool_size=3, strides=2),
        nin_block(384, kernel_size=3, strides=1, padding=1),
        nn.MaxPool2D(pool_size=3, strides=2), nn.Dropout(0.5),
        nin_block(10, kernel_size=3, strides=1, padding=1),
        nn.GlobalAvgPool2D(), nn.Flatten())

# X = nd.random.uniform(shape=(1, 1, 224, 224))
# net.initialize()
# for layer in net:
#     X = layer(X)
#     print(layer.name, 'output shape:\t', X.shape)

# train
lr, num_epochs, batch_size, ctx = 0.1, 5, 128, gb.try_gpu()
net.initialize(init=init.Xavier(), force_reinit=True, ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size=batch_size,
                                                   resize=224)
gb.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

示例#5

0

显示文件

                       (epoch + 1, train_l / len(train_data), test_loss))
        else:
            epoch_s = ("epoch %d, train loss %f, " %
                       (epoch + 1, train_l / len(train_data)))
        prev_time = cur_time
        print(epoch_s + time_s + ', lr ' + str(trainer.learning_rate))


def get_net(ctx):
    net = nn.Sequential(get_shufflenet())
    net.initialize(init.Xavier(), ctx=ctx)
    net.collect_params().reset_ctx(ctx)
    return net


def unzip_dataset(path, zip_file):
    with zipfile.ZipFile(os.path.join(path, zip_file), 'r') as z:
        z.extractall(path)


if __name__ == "__main__":
    data_dir = "E:/srcs/anti-spoofing/data"
    input_dir = 'nir_flow_1w'
    # zip_file = input_dir + '.zip'
    #unzip_dataset(data_dir, zip_file)

    train_data, test_data = train_test_data(data_dir, input_dir)
    ctx, num_epochs, lr, wd = gb.try_gpu(), 1, 0.01, 1e-4
    lr_period, lr_decay, net = 10, 0.1, get_net(ctx)
    train(net, train_data, test_data, num_epochs, lr, wd, ctx, lr_period,
          lr_decay)

示例#6

0

显示文件

文件： TD3_LunarLander_v2.py 项目： subeans/Deep-rl-mxnet

def main():
    env = gym.make('LunarLanderContinuous-v2').unwrapped
    seed = 3453534
    env.seed(seed)
    mx.random.seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    ctx = gb.try_gpu()
    # ctx = mx.cpu()
    max_episodes = 300
    max_episode_steps = 2000
    env_action_bound = [[-1, 1], [-1, 1]]

    agent = TD3(action_dim=int(env.action_space.shape[0]),
                action_bound=env_action_bound,
                actor_learning_rate=0.001,
                critic_learning_rate=0.001,
                batch_size=64,
                memory_size=100000,
                gamma=0.99,
                tau=0.005,
                explore_steps=1000,
                policy_update=2,
                policy_noise=0.2,
                explore_noise=0.1,
                noise_clip=0.5,
                ctx=ctx)

    episode_reward_list = []
    mode = input("train or test: ")

    if mode == 'train':
        render = False
        for episode in range(max_episodes):
            episode_reward = 0
            state = env.reset()
            for step in range(max_episode_steps):
                if render:
                    env.render()
                if agent.total_steps < agent.explore_steps:
                    action = env.action_space.sample()
                    agent.total_steps += 1
                else:
                    action = agent.choose_action_train(state)
                    action = action.asnumpy()
                    agent.total_steps += 1
                next_state, reward, done, info = env.step(action)
                agent.memory_buffer.store_transition(state, action, reward,
                                                     next_state, done)
                episode_reward += reward
                state = next_state
                if agent.total_steps >= agent.explore_steps:
                    agent.update()
                if done:
                    break
            print('episode  %d  reward  %f  total steps:  %d' %
                  (episode, episode_reward, agent.total_steps))
            episode_reward_list.append(episode_reward)
        agent.save()

    elif mode == 'test':
        render = True
        agent.load()
        for episode in range(max_episodes):
            episode_reward = 0
            state = env.reset()
            for step in range(max_episode_steps):
                if render:
                    env.render()
                action = agent.choose_action_train(state)
                action = action.asnumpy()
                next_state, reward, done, info = env.step(action)
                agent.memory_buffer.store_transition(state, action, reward,
                                                     next_state, done)
                episode_reward += reward
                state = next_state
                if done:
                    break
            print('episode  %d  reward  %f  total steps:  %d' %
                  (episode, episode_reward, agent.total_steps))
            episode_reward_list.append(episode_reward)
    else:
        raise NameError('Wrong input')

    env.close()
    plt.plot(episode_reward_list)
    plt.xlabel('episode')
    plt.ylabel('reward')
    plt.title('TD3 LunarLanderContinuous-v2')
    if mode == 'train':
        plt.savefig('./LunarLanderContinuous_v2')
    plt.show()

示例#7

0

显示文件

文件： ssd.py 项目： bdus/programpractice

net = TinySSD(num_classes=1)
net.initialize()
X = nd.zeros((32, 3, 256, 256))

anchors, cls_preds, bbox_preds = net(X)

print('output anchors:', anchors.shape)
print('output class preds:', cls_preds.shape)
print('output bbox preds:', bbox_preds.shape)

from pikachu.input_data import load_data_pikachu

batch_size = 32
train_data, test_data = load_data_pikachu(batch_size=batch_size)
train_data.reshape(label_shape=(3, 5))
ctx, net = gb.try_gpu(), TinySSD(num_classes=1)
net.initialize(init=init.Xavier(), ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {
    'learning_rate': 0.2,
    'wd': 5e-4
})

cls_loss = gloss.SoftmaxCrossEntropyLoss()
bbox_loss = gloss.L1Loss()


def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks):
    cls = cls_loss(cls_preds, cls_labels)
    bbox = bbox_loss(bbox_preds * bbox_masks, bbox_labels * bbox_masks)
    return cls + bbox

示例#8

0

显示文件

文件： GRU.py 项目： JasonPin/Lyric

all_chars = all_chars.replace('\n', ' ').replace('\r', ' ')
all_chars = re.sub(
    '[A-Za-z0-9\.\*\+\?\]\[＞＜<】〇〗〖\\\\【>!?>><<~/\u3000》,☆。！《》、`,～？…]', '',
    all_chars)
all_chars = all_chars[0:20000]
only_char_list = list(set(all_chars))
only_char_dict = dict([(char, i) for i, char in enumerate(only_char_list)])

chars_frequency = [only_char_dict[char]
                   for char in all_chars]  # the frequency of each character
# print(chars_frequency)
vocab_size = len(chars_frequency)

# initialize the model

ctx = gb.try_gpu()  # use gpu to accelerate speed

num_inputs = vocab_size
num_hiddens = 256  # 256 nodes
num_outputs = vocab_size


def get_params():
    '''
    :return: return parameters
    '''
    # Update Gate Zt=σ(Xt*Wxz+Ht−1*Whz+bz).
    W_xz = nd.random.normal(scale=0.01,
                            shape=(num_inputs, num_hiddens),
                            ctx=ctx)
    W_hz = nd.random.normal(scale=0.01,

示例#9

0

显示文件

        # 如果X不在CPU上，将moving_mean和moving_var复制到X所在的设备上
        if self.moving_mean.context != X.context:
            self.moving_mean = self.moving_mean.copyto(X.context)
            self.moving_var = self.moving_var.copyto(X.context)
        # 保存更新过的moving_mean和moving_var
        Y, self.moving_mean, self.moving_var = batch_norm(X,
                                                          self.gamma.data(),
                                                          self.beta.data(),
                                                          self.moving_mean,
                                                          self.moving_var,
                                                          eps=1e-5,
                                                          momentum=0.9)
        return Y


# 使用bn的lenet
net = nn.Sequential()
net.add(nn.Conv2D(6, kernel_size=5), BatchNorm(6, num_dims=4),
        nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(16, kernel_size=5), BatchNorm(16, num_dims=4),
        nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2),
        nn.Dense(120), BatchNorm(120, num_dims=2), nn.Activation('sigmoid'),
        nn.Dense(84), BatchNorm(84, num_dims=2), nn.Activation('sigmoid'),
        nn.Dense(10))

# train
lr, num_epochs, batch_size, ctx = 1.0, 5, 256, gb.try_gpu()
net.initialize(ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
gb.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

示例#10

0

显示文件

    root = os.path.expanduser(root)
    transformer = []
    if resize:
        transformer += [gdata.vision.transforms.Resize(resize)]

    transformer += [gdata.vision.transforms.ToTensor()]
    transformer = gdata.vision.transforms.Compose(transformer)
    mnist_train = gdata.vision.FashionMNIST(root=root, train=True)
    mnist_test = gdata.vision.FashionMNIST(root=root, train=False)
    num_workers = 0 if sys.platform.startswith(
        'win32') else 4  # 作者说windows目前还不支持多线程读数据
    train_iter = gdata.DataLoader(mnist_train.transform_first(transformer),
                                  batch_size,
                                  shuffle=True,
                                  num_workers=num_workers)
    test_iter = gdata.DataLoader(mnist_test.transform_first(transformer),
                                 batch_size,
                                 shuffle=False,
                                 num_workers=num_workers)

    return train_iter, test_iter


batch_size = 128
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)

# train
lr, num_epochs, ctx = 0.01, 5, gb.try_gpu()
net.initialize(init=init.Xavier(), force_reinit=True, ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
gb.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

示例#11

0

显示文件

文件： lstm.py 项目： momoComeOn/deeptool

def lstm():
    with open('./song.txt') as f:
        corpus_chars = f.read()
    print(corpus_chars[0:49])

    print len(corpus_chars)
    corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')
    print len(corpus_chars)

    idx_to_char = list(set(corpus_chars))
    char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])

    vocab_size = len(char_to_idx)

    print('vocab size:', vocab_size)

    corpus_indices = [char_to_idx[char] for char in corpus_chars]

    sample = corpus_indices[:40]

    print('chars: \n', ''.join([idx_to_char[idx] for idx in sample]))
    print('\nindices: \n', sample)

    import random
    from mxnet import nd

    def data_iter_random(corpus_indices, batch_size, num_steps, ctx=None):
        # 减一是因为label的索引是相应data的索引加一
        num_examples = (len(corpus_indices) - 1) // num_steps
        epoch_size = num_examples // batch_size
        # 随机化样本
        example_indices = list(range(num_examples))
        random.shuffle(example_indices)

        # 返回num_steps个数据
        def _data(pos):
            return corpus_indices[pos:pos + num_steps]

        for i in range(epoch_size):
            # 每次读取batch_size个随机样本
            i = i * batch_size
            batch_indices = example_indices[i:i + batch_size]
            data = nd.array([_data(j * num_steps) for j in batch_indices],
                            ctx=ctx)
            label = nd.array([_data(j * num_steps + 1) for j in batch_indices],
                             ctx=ctx)
            yield data, label

    def data_iter_consecutive(corpus_indices, batch_size, num_steps, ctx=None):
        corpus_indices = nd.array(corpus_indices, ctx=ctx)
        data_len = len(corpus_indices)
        batch_len = data_len // batch_size

        indices = corpus_indices[0:batch_size * batch_len].reshape(
            (batch_size, batch_len))
        # 减一是因为label的索引是相应data的索引加一
        epoch_size = (batch_len - 1) // num_steps

        for i in range(epoch_size):
            i = i * num_steps
            data = indices[:, i:i + num_steps]
            label = indices[:, i + 1:i + num_steps + 1]
            yield data, label

    my_seq = list(range(30))

    for data, label in data_iter_random(my_seq, batch_size=2, num_steps=3):
        print 'data: ', data, '\nlabel:', label, '\n'

    print nd.one_hot(nd.array([0, 2]), vocab_size)

    def get_inputs(data):
        return [nd.one_hot(X, vocab_size) for X in data.T]

    inputs = get_inputs(data)
    print data.T

    print('input length: ', len(inputs))
    print('input[0] shape: ', inputs[0].shape)

    import mxnet as mx

    # 尝试使用GPU
    import sys
    sys.path.append('..')
    import gluonbook as gb
    ctx = gb.try_gpu()
    print('Will use', ctx)

    input_dim = vocab_size
    # 隐含状态长度
    hidden_dim = 256
    output_dim = vocab_size
    std = .01

    def get_params_rnn():
        # 隐含层
        W_xh = nd.random_normal(scale=std,
                                shape=(input_dim, hidden_dim),
                                ctx=ctx)
        W_hh = nd.random_normal(scale=std,
                                shape=(hidden_dim, hidden_dim),
                                ctx=ctx)
        b_h = nd.zeros(hidden_dim, ctx=ctx)

        # 输出层
        W_hy = nd.random_normal(scale=std,
                                shape=(hidden_dim, output_dim),
                                ctx=ctx)
        b_y = nd.zeros(output_dim, ctx=ctx)

        params = [W_xh, W_hh, b_h, W_hy, b_y]
        for param in params:
            param.attach_grad()
        return params

    def rnn(inputs, state, *params):
        # inputs: num_steps 个尺寸为 batch_size * vocab_size 矩阵。
        # H: 尺寸为 batch_size * hidden_dim 矩阵。
        # outputs: num_steps 个尺寸为 batch_size * vocab_size 矩阵。
        H = state
        W_xh, W_hh, b_h, W_hy, b_y = params
        outputs = []
        for X in inputs:
            H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h)
            Y = nd.dot(H, W_hy) + b_y
            outputs.append(Y)
        return (outputs, H)

    def get_params():
        # 输入门参数
        W_xi = nd.random_normal(scale=std,
                                shape=(input_dim, hidden_dim),
                                ctx=ctx)
        W_hi = nd.random_normal(scale=std,
                                shape=(hidden_dim, hidden_dim),
                                ctx=ctx)
        b_i = nd.zeros(hidden_dim, ctx=ctx)

        # 遗忘门参数
        W_xf = nd.random_normal(scale=std,
                                shape=(input_dim, hidden_dim),
                                ctx=ctx)
        W_hf = nd.random_normal(scale=std,
                                shape=(hidden_dim, hidden_dim),
                                ctx=ctx)
        b_f = nd.zeros(hidden_dim, ctx=ctx)

        # 输出门参数
        W_xo = nd.random_normal(scale=std,
                                shape=(input_dim, hidden_dim),
                                ctx=ctx)
        W_ho = nd.random_normal(scale=std,
                                shape=(hidden_dim, hidden_dim),
                                ctx=ctx)
        b_o = nd.zeros(hidden_dim, ctx=ctx)

        # 候选细胞参数
        W_xc = nd.random_normal(scale=std,
                                shape=(input_dim, hidden_dim),
                                ctx=ctx)
        W_hc = nd.random_normal(scale=std,
                                shape=(hidden_dim, hidden_dim),
                                ctx=ctx)
        b_c = nd.zeros(hidden_dim, ctx=ctx)

        # 输出层
        W_hy = nd.random_normal(scale=std,
                                shape=(hidden_dim, output_dim),
                                ctx=ctx)
        b_y = nd.zeros(output_dim, ctx=ctx)

        params = [
            W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c,
            W_hy, b_y
        ]
        for param in params:
            param.attach_grad()
        return params

    def lstm_rnn(inputs, state_h, state_c, *params):
        # inputs: num_steps 个尺寸为 batch_size * vocab_size 矩阵
        # H: 尺寸为 batch_size * hidden_dim 矩阵
        # outputs: num_steps 个尺寸为 batch_size * vocab_size 矩阵
        [
            W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c,
            W_hy, b_y
        ] = params

        H = state_h
        C = state_c
        outputs = []
        for X in inputs:
            I = nd.sigmoid(nd.dot(X, W_xi) + nd.dot(H, W_hi) + b_i)
            F = nd.sigmoid(nd.dot(X, W_xf) + nd.dot(H, W_hf) + b_f)
            O = nd.sigmoid(nd.dot(X, W_xo) + nd.dot(H, W_ho) + b_o)
            C_tilda = nd.tanh(nd.dot(X, W_xc) + nd.dot(H, W_hc) + b_c)
            C = F * C + I * C_tilda
            H = O * nd.tanh(C)
            Y = nd.dot(H, W_hy) + b_y
            outputs.append(Y)
        return (outputs, H, C)

    state = nd.zeros(shape=(data.shape[0], hidden_dim), ctx=ctx)

    params = get_params()

    #outputs, state_new = rnn(get_inputs(data.as_in_context(ctx)), state, *params)

    #print('output length: ',len(outputs))
    #print('output[0] shape: ', outputs[0].shape)
    #print('state shape: ', state_new.shape)

    def predict_rnn(rnn,
                    prefix,
                    num_chars,
                    params,
                    hidden_dim,
                    ctx,
                    idx_to_char,
                    char_to_idx,
                    get_inputs,
                    is_lstm=False):
        # 预测以 prefix 开始的接下来的 num_chars 个字符。
        prefix = prefix.lower()
        state_h = nd.zeros(shape=(1, hidden_dim), ctx=ctx)
        if is_lstm:
            # 当RNN使用LSTM时才会用到，这里可以忽略。
            state_c = nd.zeros(shape=(1, hidden_dim), ctx=ctx)
        output = [char_to_idx[prefix[0]]]
        for i in range(num_chars + len(prefix)):
            X = nd.array([output[-1]], ctx=ctx)
            # 在序列中循环迭代隐含变量。
            if is_lstm:
                # 当RNN使用LSTM时才会用到，这里可以忽略。
                Y, state_h, state_c = rnn(get_inputs(X), state_h, state_c,
                                          *params)
            else:
                Y, state_h = rnn(get_inputs(X), state_h, *params)
            if i < len(prefix) - 1:
                next_input = char_to_idx[prefix[i + 1]]
            else:
                next_input = int(Y[0].argmax(axis=1).asscalar())
            output.append(next_input)
        return ''.join([idx_to_char[i] for i in output])

    def grad_clipping(params, theta, ctx):
        if theta is not None:
            norm = nd.array([0.0], ctx)
            for p in params:
                norm += nd.sum(p.grad**2)
            norm = nd.sqrt(norm).asscalar()
            if norm > theta:
                for p in params:
                    p.grad[:] *= theta / norm

    from mxnet import autograd
    from mxnet import gluon
    from math import exp

    def train_and_predict_rnn(rnn,
                              is_random_iter,
                              epochs,
                              num_steps,
                              hidden_dim,
                              learning_rate,
                              clipping_theta,
                              batch_size,
                              pred_period,
                              pred_len,
                              seqs,
                              get_params,
                              get_inputs,
                              ctx,
                              corpus_indices,
                              idx_to_char,
                              char_to_idx,
                              is_lstm=False):
        if is_random_iter:
            data_iter = data_iter_random
        else:
            data_iter = data_iter_consecutive
        params = get_params()

        softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

        for e in range(1, epochs + 1):
            # 如使用相邻批量采样，在同一个epoch中，隐含变量只需要在该epoch开始的时候初始化。
            if not is_random_iter:
                state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
                if is_lstm:
                    # 当RNN使用LSTM时才会用到，这里可以忽略。
                    state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            train_loss, num_examples = 0, 0
            for data, label in data_iter(corpus_indices, batch_size, num_steps,
                                         ctx):
                # 如使用随机批量采样，处理每个随机小批量前都需要初始化隐含变量。
                if is_random_iter:
                    state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
                    if is_lstm:
                        # 当RNN使用LSTM时才会用到，这里可以忽略。
                        state_c = nd.zeros(shape=(batch_size, hidden_dim),
                                           ctx=ctx)
                with autograd.record():
                    # outputs 尺寸：(batch_size, vocab_size)
                    if is_lstm:
                        # 当RNN使用LSTM时才会用到，这里可以忽略。
                        outputs, state_h, state_c = rnn(
                            get_inputs(data), state_h, state_c, *params)
                    else:
                        outputs, state_h = rnn(get_inputs(data), state_h,
                                               *params)
                    # 设t_ib_j为i时间批量中的j元素:
                    # label 尺寸：（batch_size * num_steps）
                    # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]
                    label = label.T.reshape((-1, ))
                    # 拼接outputs，尺寸：(batch_size * num_steps, vocab_size)。
                    outputs = nd.concat(*outputs, dim=0)
                    # 经上述操作，outputs和label已对齐。
                    loss = softmax_cross_entropy(outputs, label)
                loss.backward()

                grad_clipping(params, clipping_theta, ctx)
                gb.SGD(params, learning_rate)

                train_loss += nd.sum(loss).asscalar()
                num_examples += loss.size
                print train_loss
                print num_examples
            if e % pred_period == 0:
                print num_examples
                print train_loss
                print("Epoch %d. Perplexity %f" %
                      (e, exp(train_loss / num_examples)))
                for seq in seqs:
                    print(
                        ' - ',
                        predict_rnn(rnn, seq, pred_len, params, hidden_dim,
                                    ctx, idx_to_char, char_to_idx, get_inputs,
                                    is_lstm))
                print()

    epochs = 200
    num_steps = 15
    learning_rate = 0.1
    batch_size = 15

    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

    seq1 = '胚勾'
    seq2 = '勾勒'
    seq3 = '瓶身描绘'
    seqs = [seq1, seq2, seq3]

    for data, label in data_iter_consecutive(corpus_indices, 15, 15, ctx):
        print data
        print label

    #train_and_predict_rnn(rnn=lstm_rnn, is_random_iter=False, epochs=200, num_steps=num_steps,
    # hidden_dim=hidden_dim, learning_rate=0.2,
    # clipping_theta=5, batch_size=batch_size, pred_period=20,
    # pred_len=100, seqs=seqs, get_params=get_params,
    # get_inputs=get_inputs, ctx=ctx,
    # corpus_indices=corpus_indices, idx_to_char=idx_to_char,
    # char_to_idx=char_to_idx)
    gb.train_and_predict_rnn(rnn=lstm_rnn,
                             is_random_iter=False,
                             epochs=200,
                             num_steps=15,
                             hidden_dim=hidden_dim,
                             learning_rate=0.2,
                             clipping_norm=5,
                             batch_size=15,
                             pred_period=20,
                             pred_len=100,
                             seqs=seqs,
                             get_params=get_params,
                             get_inputs=get_inputs,
                             ctx=ctx,
                             corpus_indices=corpus_indices,
                             idx_to_char=idx_to_char,
                             char_to_idx=char_to_idx,
                             is_lstm=True)