Exemplo n.º 1
0
    def update(self):
        state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.replay_buffer.sample(
            self.batch_size)
        with autograd.record():
            # get the Q(s,a)
            all_current_q_value = self.main_network(state_batch)
            main_q_value = nd.pick(all_current_q_value, action_batch)

            # get the maxQ(s',a')
            all_next_q_value = self.target_network(
                next_state_batch).detach()  # only get gradient of main network
            max_next_q_value = nd.max(all_next_q_value, axis=1)

            target_q_value = reward_batch + (
                1 - done_batch) * self.gamma * max_next_q_value

            # record loss
            loss = gloss.L2Loss()
            value_loss = loss(target_q_value, main_q_value)
        self.main_network.collect_params().zero_grad()
        value_loss.backward()
        self.optimizer.step(batch_size=self.batch_size)
Exemplo n.º 2
0
def training2(features, labels, position, fire_point, title, batch_size=11):
    dataset = gdata.ArrayDataset(features, labels)
    data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)
    net = nn.Sequential()
    net.add(nn.Dense(1))
    net.initialize()
    loss = gloss.L2Loss()
    trainer = gluon.Trainer(net.collect_params(), 'sgd',
                            {'learning_rate': 0.01})
    epoch = 0
    m = 100
    while epoch < 1000:
        epoch += 1
        for X, y in data_iter:
            with autograd.record():
                l = loss(net(X), y)
            l.backward()
            trainer.step(batch_size)
        l = loss(net(features), labels)
        if epoch % 100 == 0 or epoch == 1:
            print('epcoh: %d  loss: %s' % (epoch, l.mean().asscalar()))
        m = l.mean().asscalar()

    print("epoch:" + str(epoch))
    print("m: " + str(m))
    print("w: " + str(net[0].weight.data().asnumpy()))
    print("b: " + str(net[0].bias.data().asnumpy()))
    print("error square: " +
          str(((net(features).reshape(batch_size, 1) -
                labels.reshape(batch_size, 1))**2).sum().asscalar()))

    p_test = nd.arange(0, 1 / fire_point, 0.1)
    q_test = net(p_test)
    plt.subplot(1, 3, position)
    plt.scatter(features.asnumpy(), labels.asnumpy(), color='#FF4700')
    plt.plot(p_test.asnumpy(), q_test.asnumpy(), color='b')
    plt.xlabel("1/p (1/kPa)")
    plt.ylabel("1/q (g carbon/mmol)")
    plt.title(title)
Exemplo n.º 3
0
def optimize(batch_size, trainer, num_epochs, decay_epoch, log_interval,
             features, labels, net):
    """Optimize an objective function."""
    dataset = gdata.ArrayDataset(features, labels)
    data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)
    loss = gloss.L2Loss()
    ls = [loss(net(features), labels).mean().asnumpy()]
    for epoch in range(1, num_epochs + 1):
        # Decay the learning rate.
        if decay_epoch and epoch > decay_epoch:
            trainer.set_learning_rate(trainer.learning_rate * 0.1)
        for batch_i, (X, y) in enumerate(data_iter):
            with autograd.record():
                l = loss(net(X), y)
            l.backward()
            trainer.step(batch_size)
            if batch_i * batch_size % log_interval == 0:
                ls.append(loss(net(features), labels).mean().asnumpy())
    # To print more conveniently, use numpy.
    print('w:', net[0].weight.data(), '\nb:', net[0].bias.data(), '\n')
    es = np.linspace(0, num_epochs, len(ls), endpoint=True)
    semilogy(es, ls, 'epoch', 'loss')
def train_gluon_ch7(trainer_name,
                    trainer_hyperparams,
                    features,
                    labels,
                    batch_size=10,
                    num_epochs=2):
    # Iniatial model
    net = nn.Sequential()
    net.add(nn.Dense(1))
    net.initialize(init.Normal(sigma=0.01))
    loss = gloss.L2Loss()

    #Store the loss
    def eval_loss():
        return loss(net(features), labels).mean().asscalar()

    ls = [eval_loss()]
    data_iter = gdata.DataLoader(gdata.ArrayDataset(features, labels),
                                 batch_size,
                                 shuffle=True)
    # Create Trainer
    trainer = gluon.Trainer(net.collect_params(), trainer_name,
                            trainer_hyperparams)
    for _ in range(num_epochs):
        start = time.time()
        for batch_i, (X, y) in enumerate(data_iter):
            with autograd.record():
                l = loss(net(X), y)
            l.backward()
            trainer.step(batch_size)  # Average the gradient
            if (batch_i + 1) * batch_size % 100 == 0:
                ls.append(eval_loss())
    # Print result and graph
    print('loss: %f, %f sec per epoch' % (ls[-1], time.time() - start))
    d2l.set_figsize()
    d2l.plt.plot(np.linspace(0, num_epochs, len(ls)), ls)
    d2l.plt.xlabel('epoch')
    d2l.plt.ylabel('loss')
Exemplo n.º 5
0
def house_prise_gulon():
    """
    使用gulon模型构建房价预估
    :return:
    """
    features = nd.array(nd.array([[120, 2], [100, 1], [130, 3]]))
    labels = nd.array([1200000, 1000000, 1300000])
    logger.info(features)
    logger.info(labels)
    # labels += nd.random.normal(scale=0.01, shape=labels.shape)

    batch_size = 10
    # 将训练数据的特征和标签组合
    dataset = gdata.ArrayDataset(features, labels)
    # 随机读取小批量
    data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)
    net = nn.Sequential()
    net.add(nn.Dense(1))
    net.initialize(init.Normal(sigma=0.01))
    loss = gloss.L2Loss()
    trainer = gluon.Trainer(net.collect_params(), 'sgd',
                            {'learning_rate': 0.03})
    num_epochs = 3
    for epoch in range(1, num_epochs + 1):
        for X, y in data_iter:
            with autograd.record():
                l = loss(net(X), y)
            l.backward()
            trainer.step(batch_size)
        l = loss(net(features), labels)
        logger.info('epoch %d, loss: %f' % (epoch, l.mean().asnumpy()))

    dense = net[0]
    logger.info("预测数据")
    logger.info(dense.weight.data())
    logger.info(dense.bias.data())

    logger.info(net(features))
Exemplo n.º 6
0
    def update(self):
        state_batch, action_batch, reward_batch, next_state_batch = self.replay_buffer.sample(
            self.batch_size)
        with autograd.record():
            # get the Q(s,a)
            all_current_q_value = self.main_network(state_batch)
            main_q_value = nd.pick(all_current_q_value, action_batch)

            # different from DQN
            # get next action from main network, then get its Q value from target network
            all_next_q_value = self.target_network(
                next_state_batch).detach()  # only get gradient of main network
            max_action = nd.argmax(all_current_q_value, axis=1)
            target_q_value = nd.pick(all_next_q_value, max_action).detach()

            target_q_value = reward_batch + self.gamma * target_q_value

            # record loss
            loss = gloss.L2Loss()
            value_loss = loss(target_q_value, main_q_value)
        self.main_network.collect_params().zero_grad()
        value_loss.backward()
        self.optimizer.step(batch_size=self.batch_size)
Exemplo n.º 7
0
def optimize(batch_size, trainer, num_epochs, decay_epoch, log_interval,
             features, labels, net):
    i = 0
    dataset = gdata.ArrayDataset(features, labels)
    data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)
    loss = gloss.L2Loss()
    ls = [loss(net(features), labels).mean().asnumpy()]
    for epoch in range(1, num_epochs + 1):
        # 学习率自我衰减。
        if decay_epoch and epoch > decay_epoch:
            trainer.set_learning_rate(trainer.learning_rate * 0.1)
        for batch_i, (X, y) in enumerate(data_iter):
            with autograd.record():
                l = loss(net(X), y)
            l.backward()
            i += 1
            trainer.step(batch_size)
            if batch_i * batch_size % log_interval == 0:
                ls.append(loss(net(features), labels).mean().asnumpy())
    # 为了便于打印,改变输出形状并转化成 numpy 数组。
    print('i:', i, 'w:', net[0].weight.data(), '\nb:', net[0].bias.data(),
          '\n')
    es = np.linspace(0, num_epochs, len(ls), endpoint=True)
    gb.semilogy(es, ls, 'epoch', 'loss')
Exemplo n.º 8
0
def train_gluon(trainer_name, hyperparams, batch_size):
    #【读取数据】
    data_iter = gdata.DataLoader(gdata.ArrayDataset(features, labels),
                                 batch_size,
                                 shuffle=True)
    #【定义模型】
    net = nn.Sequential()
    net.add(nn.Dense(1))
    #【初始化模型参数】
    net.initialize(init=init.Normal(sigma=0.01))
    #【定义损失函数】
    loss = gloss.L2Loss()
    #【定义优化算法】
    trainer = gluon.Trainer(net.collect_params(), trainer_name, hyperparams)
    #【训练模型】

    num_epochs = 2
    start = 0
    ls = []
    for _ in range(num_epochs):
        start = time.time()
        for epoch_i, (X, y) in enumerate(data_iter):
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y)
            l.backward()
            trainer.step(batch_size)
            if (epoch_i + 1) * batch_size % 100 == 0:
                ls.append(loss(net(features), labels).mean().asscalar())
    print('loss %f,%f sec per epoch' % (ls[-1],
                                        (time.time() - start) / len(ls)))

    plt.plot(np.linspace(0, num_epochs, len(ls)), ls)
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.show()
def optimize(batch_size, trainer, num_epochs, decay_epoch, log_interval, X, y,
             net):
    # num_examples = 1000
    # X, y = genData(num_examples)
    dataset = gdata.ArrayDataset(X, y)
    data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)
    square_loss = gloss.L2Loss()

    y_vals = [square_loss(net(X), y).mean().asnumpy()]
    for epoch in range(1, num_epochs + 1):
        if decay_epoch and epoch > decay_epoch:
            trainer.set_learning_rate(trainer.learning_rate * 0.1)
        for batch_i, (features, label) in enumerate(data_iter):
            with autograd.record():
                output = net(features)
                loss = square_loss(output, label)
            loss.backward()
            trainer.step(batch_size)
            if batch_i * batch_size % log_interval == 0:
                y_vals.append(square_loss(net(X), y).mean().asnumpy())
    # 为了便于打印,改变输出形状并转化成numpy数组。
    print('w:', net[0].weight.data(), '\nb:', net[0].bias.data(), '\n')
    x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True)
    utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
Exemplo n.º 10
0
def optimize_gluon(trainer, features, labels, net, decay_epoch=None,
                   batch_size=10, log_interval=10, num_epochs=3):
    """Optimize an objective function with a Gluon trainer."""
    dataset = gdata.ArrayDataset(features, labels)
    data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)
    loss = gloss.L2Loss()
    ls = [loss(net(features), labels).mean().asnumpy()]
    for epoch in range(1, num_epochs + 1):
        # Decay the learning rate.
        if decay_epoch and epoch > decay_epoch:
            trainer.set_learning_rate(trainer.learning_rate * 0.1)
        for batch_i, (X, y) in enumerate(data_iter):
            with autograd.record():
                l = loss(net(X), y)
            l.backward()
            trainer.step(batch_size)
            if batch_i * batch_size % log_interval == 0:
                ls.append(loss(net(features), labels).mean().asnumpy())
    print('w[0]=%.2f, w[1]=%.2f, b=%.2f'
          % (net[0].weight.data()[0][0].asscalar(),
             net[0].weight.data()[0][1].asscalar(),
             net[0].bias.data().asscalar()))
    es = np.linspace(0, num_epochs, len(ls), endpoint=True)
    semilogy(es, ls, 'epoch', 'loss')
Exemplo n.º 11
0
    def update(self):
        state = nd.array([t.state for t in self.buffer], ctx=self.ctx)
        action = nd.array([t.action for t in self.buffer], ctx=self.ctx)
        reward = [t.reward for t in self.buffer]
        # next_state = nd.array([t.next_state for t in self.buffer], ctx=self.ctx)
        old_action_log_prob = nd.array([t.a_log_prob for t in self.buffer],
                                       ctx=self.ctx)

        R = 0
        Gt = []
        for r in reward[::-1]:
            R = r + self.gamma * R
            Gt.insert(0, R)
        Gt = nd.array(Gt, ctx=self.ctx)
        # sample 'ppo_update_time' times
        # sample 'batch_size' samples every time
        for i in range(self.ppo_update_times):
            assert len(self.buffer) >= self.batch_size
            sample_index = random.sample(range(len(self.buffer)),
                                         self.batch_size)
            for index in sample_index:
                # optimize the actor network
                with autograd.record():
                    Gt_index = Gt[index]
                    V = self.critic_network(state[index].reshape(1,
                                                                 -1)).detach()
                    advantage = (Gt_index - V)

                    all_action_prob = self.actor_network(state[index].reshape(
                        1, -1))
                    action_prob = nd.pick(all_action_prob, action[index])

                    ratio = action_prob / old_action_log_prob[index]
                    surr1 = ratio * advantage
                    surr2 = nd.clip(ratio, 1 - self.clip_param,
                                    1 + self.clip_param) * advantage
                    action_loss = -nd.mean(nd.minimum(surr1,
                                                      surr2))  # attention
                self.actor_network.collect_params().zero_grad()
                action_loss.backward()
                actor_network_params = [
                    p.data()
                    for p in self.actor_network.collect_params().values()
                ]
                gb.grad_clipping(actor_network_params,
                                 theta=self.clip_param,
                                 ctx=self.ctx)
                self.actor_optimizer.step(1)

                # optimize the critic network
                with autograd.record():
                    Gt_index = Gt[index]
                    V = self.critic_network(state[index].reshape(1, -1))
                    loss = gloss.L2Loss()
                    value_loss = nd.mean(loss(Gt_index, V))
                self.critic_network.collect_params().zero_grad()
                value_loss.backward()
                critic_network_params = [
                    p.data()
                    for p in self.critic_network.collect_params().values()
                ]
                gb.grad_clipping(critic_network_params,
                                 theta=self.clip_param,
                                 ctx=self.ctx)
                self.critic_optimizer.step(1)

                self.training_step += 1
        # clear buffer
        del self.buffer[:]
Exemplo n.º 12
0
 def __init__(self, net, ctx=mx.cpu()):
     super(Regression, self).__init__(net=net, ctx=ctx)
     self.loss_fun = gloss.L2Loss()
     self.metric = mx.metric.PearsonCorrelation()
Exemplo n.º 13
0
    def update(self):
        self.total_train_steps += 1
        state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory_buffer.sample(
            self.batch_size)

        # --------------optimize the critic network--------------------
        with autograd.record():
            # choose next action according to target policy network
            next_action_batch = self.target_actor_network(next_state_batch)
            noise = nd.normal(loc=0,
                              scale=self.policy_noise,
                              shape=next_action_batch.shape,
                              ctx=self.ctx)
            # with noise clip
            noise = nd.clip(noise,
                            a_min=-self.noise_clip,
                            a_max=self.noise_clip)
            next_action_batch = next_action_batch + noise
            clipped_action = self.action_clip(next_action_batch)

            # get target q value
            target_q_value1 = self.target_critic_network1(
                next_state_batch, clipped_action)
            target_q_value2 = self.target_critic_network2(
                next_state_batch, clipped_action)
            target_q_value = nd.minimum(target_q_value1,
                                        target_q_value2).squeeze()
            target_q_value = reward_batch + (1.0 - done_batch) * (
                self.gamma * target_q_value)

            # get current q value
            current_q_value1 = self.main_critic_network1(
                state_batch, action_batch)
            current_q_value2 = self.main_critic_network2(
                state_batch, action_batch)
            loss = gloss.L2Loss()

            value_loss1 = loss(current_q_value1, target_q_value.detach())
            value_loss2 = loss(current_q_value2, target_q_value.detach())

        self.main_critic_network1.collect_params().zero_grad()
        value_loss1.backward()
        self.critic1_optimizer.step(self.batch_size)

        self.main_critic_network2.collect_params().zero_grad()
        value_loss2.backward()
        self.critic2_optimizer.step(self.batch_size)

        # ---------------optimize the actor network-------------------------
        if self.total_train_steps % self.policy_update == 0:
            with autograd.record():
                pred_action_batch = self.main_actor_network(state_batch)
                actor_loss = -nd.mean(
                    self.main_critic_network1(state_batch, pred_action_batch))

            self.main_actor_network.collect_params().zero_grad()
            actor_loss.backward()
            self.actor_optimizer.step(1)

            self.soft_update(self.target_actor_network,
                             self.main_actor_network)
            self.soft_update(self.target_critic_network1,
                             self.main_critic_network1)
            self.soft_update(self.target_critic_network2,
                             self.main_critic_network2)
Exemplo n.º 14
0
from mxnet.gluon import loss

loss_softmax = loss.SoftmaxCrossEntropyLoss(sparse_label=False)

loss_mse = loss.L2Loss()
Exemplo n.º 15
0
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)

batch_size = 10
dataset = gdata.ArrayDataset(features, labels)
#随机读取小批量
data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)

net = nn.Sequential()
net.add(nn.Dense(1)) #Dense定义该层输出个数为 1。全连接:Dense

#初始化模型参数
net.initialize(init.Normal(sigma=0.01))  #指定权重参数每个元素将在初始化时随机采样于均值为 0 标准差为 0.01 的正态分布。

#定义损失函数
loss = gloss.L2Loss() # 平⽅损失⼜称 L2 范数损失。

#定义优化算法 学习率的数值一般设置为1/batch_size
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03})  #指定学习率为 0.03 的小批量随机梯度下降(sgd)为优化算法 些参数可以通过 collect_params 函数获取

#训练模型
num_epochs = 3
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        with autograd.record():
            l = loss(net(X), y)
        l.backward()
        trainer.step(batch_size) #迭代模型参数  指明批量⼤小,从而对批量中样本梯度求平均
    l = loss(net(features), labels)
    print('epoch %d, loss: %f' % (epoch, l.mean().asnumpy()))
Exemplo n.º 16
0
batch_size = 10

dataset = gdata.ArrayDataset(features, labels)
data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)

from mxnet.gluon import nn

net = nn.Sequential()  #建立一个net容器
net.add(nn.Dense(1))  # 自动计算Input点数

from mxnet import init
net.initialize(init.Normal(sigma=0.01))  # initial

from mxnet.gluon import loss as gloss
loss = gloss.L2Loss()  # L2 loss

from mxnet import gluon
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03})

num_epoch = 3
for epoch in range(1, num_epoch + 1):

    for X, y in data_iter:
        with autograd.record():
            l = loss(net(X), y)
        l.backward()
        trainer.step(batch_size)
    print('epoch {0} , loss {1}'.format(
        epoch,
        loss(net(features), labels).mean().asnumpy()))
 def __init__(self, **kwargs):
     super(PolynomialRegressionGluon, self).__init__(**kwargs)
     self.net = None
     self.loss = gloss.L2Loss()
     self.regularization = kwargs.get("regularization")
Exemplo n.º 18
0
    def __init__(self, feature_dict, args, ctx, task, **kwargs):
        """{"sparse":[SingleFeat],"dense":[SingleFeat]}"""
        super(xDeepFM, self).__init__(**kwargs)  # ??
        util.mkdir_if_not_exist(args.SAVE_PARAMS_PATH_PREFIX)
        # self.feature_sizes = args.FEATURE_SIZE
        self.field_size = args.FIELD_NUM
        self.feature_dict = feature_dict
        print('field_size:')
        print(self.field_size)
        if args.TASK == 'finish':
            self.embedding_size = args.FINISH_EMBEDDING_SIZE
            self.batch_size = args.FINISH_BATCH_SIZE
        else:
            self.embedding_size = args.LIKE_EMBEDDING_SIZE
            self.batch_size = args.LIKE_BATCH_SIZE
        self.config_name = args.CONFIG_NAME
        # self.dropout_prob = args.DROPOUT_PROB
        self.task = task

        # self.loss = gloss.SigmoidBinaryCrossEntropyLoss()
        if args.LOSS == 'l2loss':
            self.loss = gloss.L2Loss()
        else:
            self.loss = gloss.SigmoidBinaryCrossEntropyLoss()
        self.ctx = ctx
        self.embedding_dict = OrderedDict()
        self.dense_dict = OrderedDict()
        with self.name_scope():
            if self.task == 'finish':
                self.layer_list = [np.int(x) for x in args.FINISH_LAYER]
                self.dropout = args.FINISH_DROPOUT_PROB
            else:
                self.layer_list = [np.int(x) for x in args.LIKE_LAYER]
                self.dropout = args.LIKE_DROPOUT_PROB
            # self.params.get('v',shape=(self.field_size,self.embedding_size))
            self.dnn_out = nn.Dense(1, use_bias=False)

            self.register_child(self.dnn_out)

            for feat in feature_dict['sparse']:
                self.embedding_dict[feat.feat_name] = nn.Embedding(
                    feat.feat_num, self.embedding_size)

            for feat in feature_dict['dense']:
                self.dense_dict[feat.feat_name] = nn.Dense(self.embedding_size)

            for emb_k, emb_v in self.embedding_dict.items():
                self.register_child(emb_v)
            for den_k, den_v in self.dense_dict.items():
                self.register_child(den_v)
            self.linear_logit_dense = nn.Dense(1, use_bias=False)
            self.register_child(self.linear_logit_dense)
            self.linear_logit_embedding_bn = nn.BatchNorm()
            self.register_child(self.linear_logit_embedding_bn)
            self.dense_list = []
            self.dropout_list = []
            self.bn_list = []
            self.activation_list = []
            for i in range(len(self.layer_list)):
                self.dense_list.append(nn.Dense(self.layer_list[i]))
                self.dropout_list.append(nn.Dropout(self.dropout))
                self.bn_list.append(nn.BatchNorm())
                self.activation_list.append(nn.Activation('relu'))
                self.register_child(self.dense_list[i])
                self.register_child(self.dropout_list[i])
                self.register_child(self.bn_list[i])
                self.register_child(self.activation_list[i])
            # if True:
            print('true')
            self.layer_size = [np.int(x) for x in args.CONV1D_LAYER]
            # self.cin_net = CIN(self.embedding_size,self.field_size, (128, 64), self.ctx)
            # print('oo')
            # self.cin_net.initialize()
            # print('uu')
            # self.register_child(self.cin_net)

            self.cin_dense = nn.Dense(1)
            self.register_child(self.cin_dense)
            self.cin_bn = nn.BatchNorm()
            self.register_child(self.cin_bn)

            self.field_nums = [self.field_size]
            self.conv_list = []
            for idx, size in enumerate(self.layer_size):
                self.conv_list.append(
                    nn.Conv1D(channels=size,
                              kernel_size=1,
                              strides=1,
                              padding=0,
                              activation='relu',
                              in_channels=self.field_nums[0] *
                              self.field_nums[-1],
                              weight_initializer=init.Uniform()))
                self.field_nums.append(size)
                self.register_child(self.conv_list[idx])
Exemplo n.º 19
0
# 1. prepare training data set
n_train, n_test, true_w, true_b = 100, 100, [1.2, -3.4, 5.6], 5
features = nd.random.normal(shape=(n_train + n_test, 1))
poly_features = nd.concat(features, nd.power(features, 2),
                          nd.power(features, 3))
labels = (true_w[0] * poly_features[:, 0] + true_w[1] * poly_features[:, 1] +
          true_w[2] * poly_features[:, 2] + true_b)
labels += nd.random.normal(loc=0, scale=0.01, shape=labels.shape)

print(features[:2], poly_features[:2], labels[:2])
print("features.shape={}, poly_features.shape={} ".format(
    features.shape, poly_features.shape))
print("labels.shape={}".format(labels.shape))

num_epochs, loss = 100, gloss.L2Loss()


# 2. training model
def fit_and_plot(train_features, test_features, train_labels, test_labels):
    net = nn.Sequential()
    net.add(nn.Dense(1))
    net.initialize()
    batch_size = min(10, train_labels.shape[0])
    train_iter = gdata.DataLoader(gdata.ArrayDataset(train_features,
                                                     train_labels),
                                  batch_size,
                                  shuffle=True)
    trainer = gluon.Trainer(net.collect_params(), 'sgd',
                            {'learning_rate': 0.01})
    train_ls, test_ls = [], []
Exemplo n.º 20
0
modelname = 'semi_pi_simple2'
basemodel_zoo = 'simple2'
net = symbols.get_model('simple2')
net.initialize(mx.init.Xavier(magnitude=2.24))

#net.load_parameters(os.path.join('symbols','para','%s.params'%(modelname)))


# g(x) : stochastic input augmentation function
def g(x):
    return x + nd.random.normal(0, stochastic_ratio, shape=x.shape)


# loss function
l_logistic = gloss.SoftmaxCrossEntropyLoss()
l_l2loss = gloss.L2Loss()
metric = mx.metric.Accuracy()


# train
def test():
    metric = mx.metric.Accuracy()
    for data, label in val_data:
        X = data.reshape((-1, 1, 28, 28))
        #img = nd.concat(X,X,X,dim=1)
        output = net(X)
        metric.update([label], [output])
    return metric.get()


def train(epochs, alpha):
Exemplo n.º 21
0
def log_rmse(net, features, labels):
    l2_loss = gloss.L2Loss()
    clipped_preds = nd.clip(net(features), 1, float('inf'))
    rmse = nd.sqrt(2 * l2_loss(clipped_preds.log(), labels.log()).mean())
    return rmse
 def __init__(self):
     super(LinRegGluon, self).__init__()
     self.net = None
     if not self.loss:
         self.loss = g_loss.L2Loss()
import mxnet
from mxnet.gluon import nn, loss as gloss

import mxnet as mx
import mxnet.ndarray as nd
from mxnet import nd, autograd, gluon
from mxnet.gluon.data.vision import transforms

# L2 Loss
loss2 = gloss.L2Loss()

# sample data
x = nd.ones((2, ))
y = nd.ones((2, )) * 2
loss2(x, y)

# Huber loss
loss_huber = gloss.HuberLoss(rho=0.85)  # threshold rho

loss = gloss.SoftmaxCrossEntropyLoss()
x = nd.array([[1, 10], [8, 2]])
y = nd.array([0, 1])
loss(x, y)
# 全连接层

net.add(nn.Dense(1))

# 【初始化模型参数】

# 默认初始化的是权重参数,采用标准差为0.01的正态分布
# 偏差参数默认初始化为0

# 这时net已经有了w和b,后续操作中只需要将输入输入层即可
# 这个net(X)的操作就是求出预测值的过程!
net.initialize(init.Normal(sigma=0.01))

# 【定义损失函数】

loss = loss.L2Loss()  # 平方损失又称为L2范数损失

# 【定义优化算法】

# 定义一个模型参数优化算法一共需要4个参数,w,b,learning_rate,batch_size
# 其中batch_size这个参数在最后的step函数中给出!
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03})

# 【训练模型】

num_epochs = 3

for epoch in range(num_epochs):
    for X, y in data_iter:
        with autograd.record():
            l = loss(net(X), y)
Exemplo n.º 25
0
    def build_model(self):
        # DataLoader
        train_transform = transforms.Compose([
            transforms.RandomFlipLeftRight(),
            transforms.Resize((self.img_size + 30, self.img_size + 30)),
            transforms.RandomResizedCrop(self.img_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
        ])
        test_transform = transforms.Compose([
            transforms.Resize((self.img_size, self.img_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
        ])

        self.trainA = ImageFolder(
            os.path.join('dataset', self.dataset, 'trainA'), train_transform)
        self.trainB = ImageFolder(
            os.path.join('dataset', self.dataset, 'trainB'), train_transform)
        self.testA = ImageFolder(
            os.path.join('dataset', self.dataset, 'testA'), test_transform)
        self.testB = ImageFolder(
            os.path.join('dataset', self.dataset, 'testB'), test_transform)
        self.trainA_loader = DataLoader(self.trainA,
                                        batch_size=self.batch_size,
                                        shuffle=True,
                                        num_workers=self.num_workers)
        self.trainB_loader = DataLoader(self.trainB,
                                        batch_size=self.batch_size,
                                        shuffle=True,
                                        num_workers=self.num_workers)
        self.testA_loader = DataLoader(self.testA, batch_size=1, shuffle=False)
        self.testB_loader = DataLoader(self.testB, batch_size=1, shuffle=False)
        """ Define Generator, Discriminator """
        self.genA2B = ResnetGenerator(input_nc=3,
                                      output_nc=3,
                                      ngf=self.ch,
                                      n_blocks=self.n_res,
                                      img_size=self.img_size,
                                      light=self.light)
        self.genB2A = ResnetGenerator(input_nc=3,
                                      output_nc=3,
                                      ngf=self.ch,
                                      n_blocks=self.n_res,
                                      img_size=self.img_size,
                                      light=self.light)
        self.disGA = Discriminator(input_nc=3, ndf=self.ch, n_layers=7)
        self.disGB = Discriminator(input_nc=3, ndf=self.ch, n_layers=7)
        self.disLA = Discriminator(input_nc=3, ndf=self.ch, n_layers=5)
        self.disLB = Discriminator(input_nc=3, ndf=self.ch, n_layers=5)

        self.whole_model = nn.HybridSequential()
        self.whole_model.add(*[
            self.genA2B, self.genB2A, self.disGA, self.disGB, self.disLA,
            self.disLB
        ])

        self.whole_model.hybridize(static_alloc=False, static_shape=False)
        """ Define Loss """
        self.L1_loss = gloss.L1Loss()
        self.MSE_loss = gloss.L2Loss(weight=2)
        self.BCE_loss = gloss.SigmoidBCELoss()
        """ Initialize Parameters"""
        params = self.whole_model.collect_params()
        block = self.whole_model
        if not self.debug:
            force_init(block.collect_params('.*?_weight'), KaimingUniform())
            force_init(block.collect_params('.*?_bias'),
                       BiasInitializer(params))
            block.collect_params('.*?_rho').initialize()
            block.collect_params('.*?_gamma').initialize()
            block.collect_params('.*?_beta').initialize()
            block.collect_params('.*?_state_.*?').initialize()
        else:
            pass
        block.collect_params().reset_ctx(self.dev)
        """ Trainer """
        self.G_params = param_dicts_merge(
            self.genA2B.collect_params(),
            self.genB2A.collect_params(),
        )
        self.G_optim = gluon.Trainer(
            self.G_params,
            'adam',
            dict(learning_rate=self.lr,
                 beta1=0.5,
                 beta2=0.999,
                 wd=self.weight_decay),
        )
        self.D_params = param_dicts_merge(self.disGA.collect_params(),
                                          self.disGB.collect_params(),
                                          self.disLA.collect_params(),
                                          self.disLB.collect_params())
        self.D_optim = gluon.Trainer(
            self.D_params,
            'adam',
            dict(learning_rate=self.lr,
                 beta1=0.5,
                 beta2=0.999,
                 wd=self.weight_decay),
        )
        """ Define Rho clipper to constraint the value of rho in AdaILN and ILN"""
        self.Rho_clipper = RhoClipper(0, 1)
Exemplo n.º 26
0
    break



from mxnet.gluon import nn
net = nn.Sequential()
net.add(nn.Dense(1))


from mxnet import init

net.initialize(init.Normal(sigma=0.01))

from mxnet.gluon import loss as gloss

loss = gloss.L2Loss()  # 平方损失又称 L2 范数损失。

from mxnet import gluon

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03})


num_epochs = 3
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        with autograd.record():
            l = loss(net(X), y)
        l.backward()
        trainer.step(batch_size)
    l = loss(net(features), labels)
    print('epoch %d, loss: %f' % (epoch, l.mean().asnumpy()))
Exemplo n.º 27
0
def train_GAS_ch9(model, data_utils, batch_size, lr, num_epochs, ctx):
    model.initialize(init.Xavier(), force_reinit=True, ctx=ctx)
    trainer = gluon.Trainer(model.collect_params(), 'adam',
                            {'learning_rate': lr})
    loss = d2l.MaskedSoftmaxCELoss()
    loss1 = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True)
    loss2 = gloss.L2Loss()
    animator = d2l.Animator(xlabel='epoch',
                            ylabel='loss',
                            xlim=[1, num_epochs],
                            ylim=[0, 0.25])
    for epoch in range(1, num_epochs + 1):
        timer = d2l.Timer()
        metric = d2l.Accumulator(2)  # loss_sum, num_tokens
        l_sum, l_class_sum, l_score_sum, n, acc_sum = 0.0, 0.0, 0.0, 0, 0.0
        rmse_sum = nd.array([0, 0, 0])
        data_iter = data_utils.get_batch_train(batch_size)
        # ti=0

        for i, (X, Y) in enumerate(data_iter):
            # print(X.shape,Y.shape)               ##X=(128, 10, 16) (128, 5)
            # exit()
            X, Y = nd.array(X).as_np_ndarray(), nd.array(Y).as_np_ndarray()
            # print("after reshape",X.shape, Y.shape)         ##after reshape (128, 10, 16) (128, 5)
            # exit()
            # vlinz=nd.random.randint(0,10,(X.shape[0],)).as_np_ndarray()
            # vlinz=nd.ones((X.shape[0],)).as_np_ndarray()
            valid_len = np.repeat(np.array([X.shape[1]]), X.shape[0])
            # print(valid_len.shape)                       ##(128,)
            # exit()
            # ti+=1
            # print('keepup',ti)
            with autograd.record():
                dec_output = model(X, valid_len)
                # print(dec_output.shape)             ##(128, 10, 5)
                # exit()
                #         ###################
                #         l = loss(dec_output, Y, vlinz)
                #     l.backward()
                #     d2l.grad_clipping(model, 1)
                #     num_tokens = vlinz.sum()
                #     trainer.step(num_tokens)
                #     metric.add(l.sum(), num_tokens)
                #     # exit()
                # if epoch % 10 == 0:
                #     animator.add(epoch, (metric[0] / metric[1],))
                # print(f'loss {metric[0] / metric[1]:.3f}, {metric[1] / timer.stop():.1f} '
                #       f'tokens/sec on {str(ctx)}')
                #         ##########################
                output = dec_output.as_nd_ndarray()
                cl_res, score_res = class_and_score_forward(output)
                # print("shape of cl_res:",cl_res.shape,"shape of Y[0][:,:3]:",Y.shape)
                # print("shape of score_res:",score_res.shape,"shape of Y[0][:,3:]:",Y.shape)
                cl_weight, conc_weight = nd.ones_like(cl_res), nd.ones_like(
                    score_res)
                l_class = loss1(cl_res.as_np_ndarray(), Y[:, :3],
                                cl_weight.as_np_ndarray()).sum()
                l_conc = loss2(score_res.as_np_ndarray(), Y[:, 3:],
                               conc_weight.as_np_ndarray()).sum()
                n = Y.shape[0]
                l = (l_class / n) + (l_conc / n)
            l.backward()
            d2l.grad_clipping(model, 1)
            num_tokens = n
            trainer.step(num_tokens)
            metric.add(l.sum(), num_tokens)
        if epoch % 10 == 0:
            animator.add(epoch, (metric[0] / metric[1], ))
    print(f'loss {metric[0] / metric[1]:.3f}, {metric[1] / timer.stop():.1f} '
          f'tokens/sec on {str(ctx)}')
Exemplo n.º 28
0
    return gluon.data.DataLoader(dataset,batch_size,shuffle=is_train)


if (__name__=='__main__'):
    true_w=nd.array([2,-3.4])
    true_b=4.2
    features,labels=synthetic_data(true_w,true_b,1000)

    batch_size=10
    data_iter=load_array((features,labels),batch_size)

    net=nn.Sequential()
    net.add(nn.Dense(1))

    net.initialize(init.Normal(sigma=0.01))
    loss=gloss.L2Loss() #The squared loss is known as the L2 norm loss
    trainer=gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.03})

    num_epochs=3
    for epoch in range(1,num_epochs+1):
        for X,y in data_iter:
            with autograd.record():
                l=loss(net(X),y)
            l.backward()
            trainer.step(batch_size)
        l=loss(net(features),labels)
        print('epoch %d,loss: %f'%(epoch,l.mean().asnumpy()))

    print('After regression, w is ',net[0].weight.data())
    print('After regression, b is ',net[0].bias.data())
Exemplo n.º 29
0
             y2_vals=None,
             legend=None,
             figsize=(3.5, 2.5)):
    gb.plt.rcParams['figure.figsize'] = figsize
    set_matplotlib_formats('retina')
    gb.plt.xlabel(x_label)
    gb.plt.ylabel(y_label)
    gb.plt.semilogy(x_vals, y_vals)
    if x2_vals and y2_vals:
        gb.plt.semilogy(x2_vals, y2_vals)
        gb.plt.legend(legend)
    gb.plt.show()


num_epochs = 100
loss = gloss.L2Loss()


def fit_and_plot(train_features, test_features, train_labels, test_labels):
    net = nn.Sequential()
    net.add(nn.Dense(1))
    net.initialize()
    batch_size = min(100, train_labels.shape[0])
    train_iter = gdata.DataLoader(gdata.ArrayDataset(train_features,
                                                     train_labels),
                                  batch_size,
                                  shuffle=True)
    trainer = gluon.Trainer(net.collect_params(), 'sgd',
                            {'learning_rate': 0.01})

    train_ls, test_ls = [], []
Exemplo n.º 30
0
def main() -> None:
    """
    Main execution of the module.
    """
    # Setup the same initial constraints as our previous linear regression model.
    true_weights = np.array([2, -3.4])
    true_bias = 4.2
    features, targets = d2l.synthetic_data(true_weights, true_bias, 1000)

    batch_size = 10
    data_iterator = load_array((features, targets), batch_size, True)

    # Create a seuqential neural network with one output layer. gluon will infer
    # the input shape the first time data is passed through to it.
    net = nn.Sequential()
    net.add(nn.Dense(1))

    # Initialize the weights with a random sample from a normal distribution
    # with a mean of 0 and a standard deviation of 0.01. bias is initialized as
    # by default. The initialization is deferred until the first attempt to pass
    # data through the network.
    net.initialize(init.Normal(sigma=0.01))

    # The squared loss is also known as the L2 norm loss.
    l2_loss = loss.L2Loss()

    # Setup our SGD optimizer through the trainer class.
    trainer = gluon.Trainer(net.collect_params(), "sgd",
                            {"learning_rate": 0.03})

    num_epochs = 3

    # Training loop time
    for epoch in range(1, num_epochs + 1):
        for feature_batch, target_batch in data_iterator:
            with autograd.record():
                predicted_targets = net(feature_batch)
                batch_loss = l2_loss(predicted_targets, target_batch)

            # Compute the gradients for all of our weights and bias. The trainer
            # initialized the parameters for us already, allowing us to not worry
            # about manually attaching gradients.
            batch_loss.backward()

            # Because we're passing in a number of batches, we need to compute
            # reduction of all gradients in order to update our model
            # accordingly.
            trainer.step(batch_size)

        # Compute the overall loss for the epoch.
        epoch_loss = l2_loss(net(features), targets)
        print(f"epoch {epoch}, loss: {epoch_loss.mean().asnumpy()}")

    # Obtain the weights and biases from the first (and only) layer inside of
    # our model.
    first_layer_weights = net[0].weight.data()
    first_layer_bias = net[0].bias.data()

    print(
        f"Error in estimating the weights: {true_weights.reshape(first_layer_weights.shape) - first_layer_weights}"
    )
    print(f"Error in estimating the bias: {true_bias - first_layer_bias}")