Пример #1
0
def training_full(network, batch_size, epochs, loss_fn, optimizer, data, wo,
                  ho):
    train_features, train_labels, val_features, val_labels = data
    num_epochs = train_features.shape[0] // batch_size
    train_acc = metric.Accuracy()
    val_acc = metric.Accuracy()
    t = trange(epochs, leave=True)
    for e in t:
        for i in range(num_epochs):
            final_loss = 0
            batch_X = mx.nd.array(train_features[i * batch_size:(i + 1) *
                                                 batch_size],
                                  ctx=context).astype('float32')
            batch_Y = mx.nd.array(train_labels[i * batch_size:(i + 1) *
                                               batch_size],
                                  ctx=context).astype('long')
            flag = 0
            network, loss = training_step(batch_X, batch_Y, optimizer, loss_fn,
                                          network, wo, ho, batch_size, flag)
            final_loss += loss.mean().asscalar()
            flag = 1
            l, o = training_step(batch_X, batch_Y, optimizer, loss_fn, network,
                                 wo, ho, batch_size, flag)
            train_acc.update(l, o)
        validation_loss = validate(network, data, wo, ho, loss_fn)
        l1, o1 = validate(network, data, wo, ho, loss_fn, flag=1)
        val_acc.update(l1, o1)
    return network, final_loss, train_acc.get(
    )[1], validation_loss, val_acc.get()[1]
Пример #2
0
def _get_mxnet_metrics(train_config):
    metrics_mxnet = [
        metric.MSE(name='value_loss',
                   output_names=['value_output'],
                   label_names=['value_label']),
        metric.CrossEntropy(name='policy_loss',
                            output_names=['policy_output'],
                            label_names=['policy_label']),
        metric.create(acc_sign,
                      name='value_acc_sign',
                      output_names=['value_output'],
                      label_names=['value_label']),
        metric.Accuracy(axis=1,
                        name='policy_acc',
                        output_names=['policy_output'],
                        label_names=['policy_label'])
    ]
    if train_config.use_wdl:
        metrics_mxnet.append(
            metric.CrossEntropy(name='wdl_loss',
                                output_names=['wdl_output'],
                                label_names=['wdl_label']))
        metrics_mxnet.append(
            metric.Accuracy(axis=1,
                            name='wdl_acc',
                            output_names=['wdl_output'],
                            label_names=['wdl_label']))
    if train_config.use_plys_to_end:
        metrics_mxnet.append(
            metric.MSE(name='plys_to_end_loss',
                       output_names=['plys_to_end_output'],
                       label_names=['plys_to_end_label']))
    return metrics_mxnet
Пример #3
0
def _get_gluon_metrics(train_config):
    metrics_gluon = {
        'value_loss':
        metric.MSE(name='value_loss', output_names=['value_output']),
        'value_acc_sign':
        metric.create(acc_sign,
                      name='value_acc_sign',
                      output_names=['value_output'],
                      label_names=['value_label']),
    }
    if train_config.sparse_policy_label:
        # the default cross entropy only supports sparse labels
        metrics_gluon['policy_loss'] = metric.CrossEntropy(
            name='policy_loss',
            output_names=['policy_output'],
            label_names=['policy_label']),
        metrics_gluon['policy_acc'] = metric.Accuracy(
            axis=1,
            name='policy_acc',
            output_names=['policy_output'],
            label_names=['policy_label'])
    else:
        metrics_gluon['policy_loss'] = metric.create(
            cross_entropy,
            name='policy_loss',
            output_names=['policy_output'],
            label_names=['policy_label'])
        metrics_gluon['policy_acc'] = metric.create(
            acc_distribution,
            name='policy_acc',
            output_names=['policy_output'],
            label_names=['policy_label'])
    return metrics_gluon
def validate(network, validation_dataloader):
    """
    Should compute the accuracy of the network on the validation set.
    
    :param network: initialized gluon network to be trained
    :type network: gluon.Block
    
    :param validation_dataloader: the training DataLoader provides batches for data for every iteration
    :type validation_dataloader: gluon.data.DataLoader
    
    :return: validation accuracy
    :rtype: float
    """
    
    # YOUR CODE HERE
    # raise NotImplementedError()
    
    validation_accuracy = metric.Accuracy()
    
    # validation loop
    for data, label in validation_dataloader:
        output = network(data)
        validation_accuracy.update(label, output)
    
    validation_accuracy = validation_accuracy.get()[1]
    
    return validation_accuracy
Пример #5
0
def validate(net, val_data, ctx, loss, plot=False):
    metric = mtc.Accuracy()
    val_loss = 0
    ebs = []
    lbs = []
    for i, batch in enumerate(val_data):
        data = gluon.utils.split_and_load(batch[0],
                                          ctx_list=ctx,
                                          batch_axis=0,
                                          even_split=False)
        labels = gluon.utils.split_and_load(batch[1],
                                            ctx_list=ctx,
                                            batch_axis=0,
                                            even_split=False)

        ots = [net(X) for X in data]
        embedds = [ot[0] for ot in ots]
        outputs = [ot[1] for ot in ots]

        losses = [loss(yhat, y) for yhat, y in zip(outputs, labels)]
        metric.update(labels, outputs)
        val_loss += sum([l.mean().asscalar() for l in losses]) / len(losses)
        if plot:
            for es, ls in zip(embedds, labels):
                assert len(es) == len(ls)
                for idx in range(len(es)):
                    ebs.append(es[idx].asnumpy())
                    lbs.append(ls[idx].asscalar())
    if plot:
        ebs = np.vstack(ebs)
        lbs = np.hstack(lbs)

    _, val_acc = metric.get()
    return val_acc, val_loss / len(val_data), ebs, lbs
Пример #6
0
def train(network, training_dataloader, batch_size, epochs):

    # Define training metrics
    train_acc = metric.Accuracy()

    #Define parameters needed for training : Optimizer & Learning Rate
    trainer = gluon.Trainer(network.collect_params(), 'adam',
                            {'learning_rate': 0.002})

    # Write a training loop to feed forward, do back-propagation
    # with the error identified to update the respective weights
    for epoch in range(epochs):
        train_loss = 0
        tic = time()
        for data, label in training_dataloader:
            with autograd.record():
                output = network(data)
                loss = loss_fn(output, label)
            loss.backward()
            trainer.step(batch_size)

            train_loss += loss.asnumpy().mean()
            train_acc.update(label, output)

        # Design to print epoch, loss, accuracy for every iteration
        print(
            "Epoch(%d) Loss:%.3f Acc:%.3f " %
            (epoch, train_loss / len(training_dataloader), train_acc.get()[1]))

    return network, train_acc.get()[1]
Пример #7
0
def test(valid_iter, net, ctx):
    val_metric = metric.Accuracy()
    for X, y in valid_iter:
        X = X.as_in_context(ctx)
        y = y.as_in_context(ctx).astype('float32')  # 模型的输出是 float32 类型数据
        outputs = net(X)
        val_metric.update(y, outputs)
    return val_metric.get()
Пример #8
0
 def evaluate_accuracy(self, data_iterator, net):
     '''Given model and data, the model accuracy will be calculated.'''
     acc = metric.Accuracy()
     for i, (data, label) in enumerate(data_iterator):
         data = data.as_in_context(self.ctx).astype(self.precision)
         label = label.as_in_context(self.ctx).astype(self.precision)
         output = net(data)
         predictions = nd.argmax(output, axis=1)
         acc.update(preds=predictions, labels=label)
     return acc.get()[1]
Пример #9
0
def evaluate_mxnet(model, test_data, loss_fn, device):
    acc = metric.Accuracy()
    total_loss = 0.0
    for X, Y in test_data:
        X, Y = X.copyto(device), Y.copyto(device)
        pred = model(X)
        loss = loss_fn(pred, Y)

        total_loss += loss.mean().asscalar()
        acc.update(preds=pred, labels=Y)
    return acc.get()[1], total_loss / len(test_data)
Пример #10
0
def train_model_mxnet(model,
                      train_data,
                      test_data,
                      device,
                      epochs=40,
                      vis_mod=10):
    loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
    optimizer = gluon.Trainer(model.collect_params(), 'sgd', {
        'learning_rate': 1e-2,
        'momentum': 0.5,
        'clip_gradient': 5.0
    })

    train_acc = []
    train_loss = []
    test_acc = []
    test_loss = []

    start = time.time()
    for epoch in range(epochs):
        acc = metric.Accuracy()
        total_loss = 0.0

        for X, Y in train_data:
            X, Y = X.copyto(device), Y.copyto(device)
            with autograd.record():
                pred = model(X)
                loss = loss_fn(pred, Y)
            loss.backward()
            optimizer.step(batch_size=X.shape[0])
            total_loss += loss.mean().asscalar()

            acc.update(preds=pred, labels=Y)

        train_loss.append(total_loss / len(train_data))
        train_acc.append(acc.get()[1])
        _test_acc, _test_loss = evaluate_mxnet(model, test_data, loss_fn,
                                               device)
        test_acc.append(_test_acc)
        test_loss.append(_test_loss)
        if (epoch + 1) % vis_mod == 0:
            print(f'[Epoch {epoch + 1:3d}] train_acc: {100 * acc.get()[1]:4.2f}% - '\
                  f'train_loss: {total_loss / len(train_data):6.3f}')
            print(f'[Epoch {epoch + 1:3d}] val_acc: {100 * _test_acc:5.2f}% - '\
                  f'val_loss: {_test_loss:6.3f}')

            print(f'Model runtime: {time.time() - start:6.3f}s')

    return {
        'accuracy': train_acc,
        'loss': train_loss,
        'test_accuracy': test_acc,
        'test_loss': test_loss
    }
Пример #11
0
def train(train_iter):

    net = nn.HybridSequential()
    with net.name_scope():
        net.add(
            model.DSOD(32, 6, 32, 1, 1)  # 64 6 48 1 1
        )
    net.initialize()

    box_loss = SmoothL1Loss()
    cls_loss = FocalLoss()  # hard neg mining vs FocalLoss()
    l1_loss = gluon.loss.L1Loss()
    net.collect_params().reset_ctx(ctx)
    trainer = gluon.Trainer(net.collect_params(),
                            'sgd', {'learning_rate': 0.1, 'wd': 5e-4})

    cls_metric = metric.Accuracy()
    box_metric = metric.MAE()

    filename = args.params
    if args.retrain:
        print('load last time weighting')
        net.load_params(filename, ctx=mx.gpu())

    for epoch in range(args.epoch):
        train_data.reset()
        cls_metric.reset()
        box_metric.reset()
        tic = time.time()

        for i, batch in enumerate(train_data):
            x = batch.data[0].as_in_context(ctx)
            y = batch.label[0].as_in_context(ctx)

            with mx.autograd.record():
                anchors, class_preds, box_preds = net(x)
                box_target, box_mask, cls_target = training_targets(anchors, class_preds, y)

                loss1 = cls_loss(class_preds, cls_target)

                loss2 = l1_loss(box_preds, box_target, box_mask)

                loss = loss1 + 5 * loss2
            loss.backward()
            trainer.step(batch_size)

            cls_metric.update([cls_target], [class_preds.transpose((0, 2, 1))])
            box_metric.update([box_target], [box_preds * box_mask])

        print('Epoch %2d, train %s %.2f, %s %.5f, time %.1f sec' % (
            epoch, *cls_metric.get(), *box_metric.get(), time.time() - tic))

        net.save_params(filename)
Пример #12
0
def eval_model(features, labels):
    l_sum = 0
    l_n = 0
    accuracy = metric.Accuracy()
    for i in range(features.shape[0] // batch_size):
        X = features[i * batch_size:(i + 1) * batch_size].as_in_context(ctx).T
        y = labels[i * batch_size:(i + 1) * batch_size].as_in_context(ctx).T
        output = net(X)
        l = loss(output, y)
        l_sum += l.sum().asscalar()
        l_n += l.size
        accuracy.update(preds=nd.argmax(output, axis=1), labels=y)
    return l_sum / l_n, accuracy.get()[1]
def train(network, training_dataloader, batch_size, epochs):
    """
    Should take an initialized network and train that network using data from the data loader.
    
    :param network: initialized gluon network to be trained
    :type network: gluon.Block
    
    :param training_dataloader: the training DataLoader provides batches for data for every iteration
    :type training_dataloader: gluon.data.DataLoader
    
    :param batch_size: batch size for the DataLoader.
    :type batch_size: int
    
    :param epochs: number of epochs to train the DataLoader
    :type epochs: int
    
    :return: tuple of trained network and the final training accuracy
    :rtype: (gluon.Block, float)
    """

    loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
    train_accuracy = metric.Accuracy()
    trainer = gluon.Trainer(net.collect_params(), 'adam',
                            {'learning_rate': 0.002})

    for epoch in range(epochs):
        train_loss = 0.
        tic = time()
        for data, label in training_dataloader:
            with autograd.record():
                output = network(data)
                loss = loss_fn(output, label)
            loss.backward()

            trainer.step(batch_size)

            train_loss += loss.mean().asscalar()
            train_accuracy.update(label, output)

        print("Epoch(%d) Loss:%.3f Acc:%.3f Perf: %.1f img/sec" %
              (epoch, train_loss / len(training_dataloader),
               train_accuracy.get()[1], len(training_dataloader) /
               (time() - tic)))

    network.save_parameters("trained_net.params")
    network = network
    training_accuracy = train_accuracy.get()[1]

    #raise NotImplementedError()

    return network, training_accuracy
def train(network, training_dataloader, batch_size, epochs):
    """
    Should take an initialized network and train that network using data from the data loader.
    
    :param network: initialized gluon network to be trained
    :type network: gluon.Block
    
    :param training_dataloader: the training DataLoader provides batches for data for every iteration
    :type training_dataloader: gluon.data.DataLoader
    
    :param batch_size: batch size for the DataLoader.
    :type batch_size: int
    
    :param epochs: number of epochs to train the DataLoader
    :type epochs: int
    
    :return: tuple of trained network and the final training accuracy
    :rtype: (gluon.Block, float)
    """
    
    # YOUR CODE HERE
    # raise NotImplementedError()
    
    # we have: 
    # network, 
    # training_dataloader, 
    # batch_size, 
    # epochs
    
    # create:
    # loss function
    # metric accumulator
    # trainer (adam)
    # training loop
    
    loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
    train_acc = metric.Accuracy()
    trainer = gluon.Trainer(network.collect_params(), 'adam', { 'learning_rate' : 0.002 })
    
    for epoch in range(epochs):
        for data, label in training_dataloader:
            with autograd.record():
                output = network(data)
                loss = loss_fn(output, label)
            loss.backward()
            trainer.step(batch_size)
            train_acc.update(label, output)
        training_accuracy = train_acc.get()[1]
        print(training_accuracy)
    return network, training_accuracy
Пример #15
0
def train(ctx,
          loss,
          trainer,
          datasetName,
          modelName,
          net,
          train_iter,
          valid_iter,
          num_epochs,
          n_retrain_epoch=0):
    '''
    n_retrain_epoch 是从第 n_retrain_epoch 次开始训练模型
    '''
    train_metric = metric.Accuracy()
    train_history = TrainingHistory(['training-error', 'validation-error'])
    best_val_score = 0
    modelDir, resultDir = get_result_dirs(datasetName)
    for epoch in range(num_epochs):
        train_l_batch, start = 0.0, time.time()  # 计时开始
        train_metric.reset()
        for X, y in train_iter:
            X = X.as_in_context(ctx)
            y = y.as_in_context(ctx).astype('float32')  # 模型的输出是 float32 类型数据
            with autograd.record():  # 记录梯度信息
                outputs = net(X)  # 模型输出
                l = loss(outputs, y).mean()  # 计算平均损失
            l.backward()  # 反向传播
            trainer.step(1)
            train_l_batch += l.asscalar()  # 计算该批量的总损失
            train_metric.update(y, outputs)  # 计算训练精度
        _, train_acc = train_metric.get()
        time_s = "time {:.2f} sec".format(time.time() - start)  # 计时结束
        valid_loss = evaluate_loss(valid_iter, net, ctx, loss)  # 计算验证集的平均损失
        _, val_acc = test(valid_iter, net, ctx)  # 计算验证集的精度
        epoch_s = (
            "epoch {:d}, train loss {:.5f}, valid loss {:.5f}, train acc {:.5f}, valid acc {:.5f}, "
            .format(n_retrain_epoch + epoch, train_l_batch, valid_loss,
                    train_acc, val_acc))
        print(epoch_s + time_s)
        train_history.update([1 - train_acc, 1 - val_acc])  # 更新图像的纵轴
        train_history.plot(
            save_path=f'{resultDir}/{modelName}_history.png')  # 实时更新图像
        if val_acc > best_val_score:  # 保存比较好的模型
            best_val_score = val_acc
            net.save_parameters('{}/{:.4f}-{}-{:d}-best.params'.format(
                modelDir, best_val_score, modelName, n_retrain_epoch + epoch))
    return train_history
Пример #16
0
    def fit(self, train_gen, test_gen, epochs, print_every, loss_with_softmax,
            optimizer):

        trainer = gluon.Trainer(params=self.collect_params(),
                                optimizer=optimizer)
        # Initialize some objects for the metrics
        acc = metric.Accuracy()
        train_acc_records = []
        test_acc_records = []
        loss_records = []

        for e in range(epochs):
            for i, (data, label) in enumerate(train_gen):

                data = data.as_in_context(self.ctx).astype(self.precision)
                label = label.as_in_context(self.ctx).astype(np.float32)

                with autograd.record():
                    label_linear = self.layer(data)
                    label_linear = label_linear.astype(
                        np.float32
                    )  # Improve accuracy, as suggested in nVIDIA's SDK.
                    loss = loss_with_softmax(label_linear, label)
                loss.backward()
                trainer.step(batch_size=128)

                # Print the metrics every several iterations.
                if (i % print_every == 0
                    ):  # print metrics for train (current batch) & test data.
                    label_pred = nd.argmax(nd.softmax(label_linear), axis=1)
                    acc.reset()
                    acc.update(preds=label_pred, labels=label)
                    train_acc = acc.get()[1]

                    test_acc = self.evaluate_accuracy(test_gen, self.layer)

                    train_acc_records.append(train_acc)
                    test_acc_records.append(test_acc)

                    curr_loss = nd.mean(loss).asscalar()
                    loss_records.append(curr_loss)
                    print(
                        "epoch=%2s, iter=%5d, loss=%10f, train acc=%10f, test_acc=%10f"
                        % (e, i, curr_loss, train_acc, test_acc))

        # Visialize the calculated metrics of accuracy during of training.
        self.viz_training(train_acc_records, test_acc_records, loss_records)
Пример #17
0
def calculate_accuracy(network, dataloader):
    """
    Calculates accuracy of the network on the data given by the dataloader.
    
    :param network: network to be tested
    :type network: mx.gluon.Block
    :param dataloader: dataloader for test data
    :type dataloader: mx.gluon.data.DataLoader
    
    :return: updated metric
    :rtype: mx.metric.EvalMetric
    """
    accuracy = metric.Accuracy()
    for data, labels in tqdm(dataloader):
        preds = network(data)
        accuracy.update(labels=labels, preds=preds)
    return accuracy
Пример #18
0
def train_cls_network(inference, train_loader, trainer, cur_epoch, ctx, criterion, log_iter=100):
    metric_acc = metric.Accuracy()
    metric_loss = metric.Loss()

    train_loader.reset()

    epoch_start_time = timeit.default_timer()

    for cur_batch, batch in enumerate(train_loader):
        batch_start_time = timeit.default_timer()

        batch_size = batch.data[0].shape[0]

        data = gluon.utils.split_and_load(batch.data[0], ctx)
        label = gluon.utils.split_and_load(batch.label[0], ctx)

        with autograd.record(train_mode=True):
            losses = []
            for x, y in zip(data, label):
                y_hat = inference(x)
                loss = criterion(y_hat, y)
                losses.append(loss)

                metric_loss.update(None, preds=[loss])
                metric_acc.update(preds=[y_hat], labels=[y])

        for loss in losses:
            loss.backward()

        trainer.step(batch_size)

        if cur_batch % log_iter == 0 and cur_batch > 0:
            batch_elpased_time = timeit.default_timer() - batch_start_time
            print('Epoch [%d-%d]: Speed: %.2f samples/s \t Accuracy: %.2f \t Loss: %.4f' %
                  (cur_epoch, cur_batch, batch_elpased_time / batch_size, 100 * metric_acc.get()[1],
                   metric_loss.get()[1]))

    epoch_elapsed_time = timeit.default_timer() - epoch_start_time

    logging.info('Epoch [%d]: Accuracy: %.2f' % (cur_epoch, 100 * metric_acc.get()[1]))
    logging.info('Epoch [%d]: Loss: %.2f' % (cur_epoch, metric_loss.get()[1]))
    logging.info('Epoch [%d]: Elapsed time: %s' % (cur_epoch, str(timedelta(seconds=epoch_elapsed_time))))

    return metric_acc.get()[1]
Пример #19
0
def train(net, loss_fn, train_data, epochs, batch_size):
    """
    Should take an initialized network and train that network using data from the data loader.
    
    :param network: initialized gluon network to be trained
    :type network: gluon.Block
    
    :param loss_fn: the loss function
    :type loss_fn: gluon.Block
    
    :param train_data: the training DataLoader provides batches for data for every iteration
    :type train_data: gluon.data.DataLoader
    
    :param epochs: number of epochs to train the DataLoader
    :type epochs: int
    
    :param batch_size: batch size for the DataLoader.
    :type batch_size: int
    
    :return: tuple of trained network and the final training accuracy
    :rtype: (gluon.Block, float)
    """

    # YOUR CODE HERE
    # raise NotImplementedError()

    train_acc = metric.Accuracy()
    trainer = gluon.Trainer(net.collect_params(), 'sgd',
                            {'learning_rate': 0.1})

    for epoch in range(epochs):
        print(f'epoch: {epoch}')
        for data, label in train_data:
            with autograd.record():
                output = net(data)
                loss = loss_fn(output, label)
            loss.backward()

            trainer.step(batch_size)

            train_acc.update(label, output)
            print(train_acc.get()[1])

    return (net, train_acc.get()[1])
Пример #20
0
def train_fun():

    cls_metric = metric.Accuracy()
    box_metric = metric.MAE()
    ctx = gpu(0)
    train_data, test_data, class_names, num_class = get_iterators(
        data_shape, batch_size)

    train_data.reshape(label_shape=(3, 5))
    train_data = test_data.sync_label_shape(train_data)
    net = ToySSD(num_class)
    net.initialize(init.Xavier(magnitude=2), ctx=ctx)
    net = ToySSD(num_classes=2, verbose=False)
    net.initialize(init.Xavier(magnitude=2), ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {
        'learning_rate': 0.1,
        'wd': 5e-4
    })

    import time
    from mxnet import autograd
    cls_loss = FocalLoss()
    box_loss = SmoothL1Loss()
    for epoch in range(30):
        train_data.reset()
        cls_metric.reset()
        box_metric.reset()
        tic = time.time()
        for i, batch in enumerate(train_data):
            x = batch.data[0].as_in_context(ctx)
            y = batch.label[0].as_in_context(ctx)
            with autograd.record():
                anchors, class_preds, box_preds = net(x)
                box_target, box_mask, cls_target = training_targets(
                    anchors, class_preds, y)
                loss1 = cls_loss(class_preds, cls_target)
                loss2 = box_loss(box_preds, box_target, box_mask)
                loss = loss1 + loss2
            loss.backward()
            trainer.step(batch_size)
            cls_metric.update([cls_target], [class_preds.transpose((0, 2, 1))])
            box_metric.update([box_target], [box_preds * box_mask])
        print('epoch %2d, train %s %.2f, %s %.5f, time %.1f sec' %
              (epoch, *cls_metric.get(), *box_metric.get(), time.time() - tic))
Пример #21
0
def evaluate(data_loader, data_len, model, loss, ctx):
    """
    Evaluation, return accuracy and loss
    """
    total_loss = 0.0
    acc = metric.Accuracy()

    for data, label in data_loader:
        data, label = data.as_in_context(ctx), label.as_in_context(ctx)

        with autograd.record(
                train_mode=False):  # set the training_mode to False
            output = model(data)
            losses = loss(output, label)

        total_loss += nd.sum(losses).asscalar()
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1], total_loss / data_len
Пример #22
0
def validate(net, val_data, ctx):
    metric = mtc.Accuracy()
    cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
    val_loss = 0

    for i, batch in enumerate(val_data):
        data = gluon.utils.split_and_load(batch[0],
                                          ctx_list=ctx,
                                          batch_axis=0,
                                          even_split=False)
        labels = gluon.utils.split_and_load(batch[1],
                                            ctx_list=ctx,
                                            batch_axis=0,
                                            even_split=False)
        outputs = [net(X) for X in data]
        loss = [cross_entropy(yhat, y) for yhat, y in zip(outputs, labels)]
        metric.update(labels, outputs)
        val_loss += sum([l.mean().asscalar() for l in loss]) / len(loss)

    _, val_acc = metric.get()
    return val_acc, val_loss / len(val_data)
def evaluate(network, dataloader):
    """
    Should compute the accuracy of the network on the validation set.
    
    :param network: initialized gluon network to be trained
    :type network: gluon.Block
    
    :param dataloader: the validation DataLoader provides batches for data for every iteration
    :type dataloader: gluon.data.DataLoader
    
    :return: validation accuracy
    :rtype: float
    """

    valid_acc = metric.Accuracy()
    for data, label in dataloader:
        output = network(data)
        valid_acc.update(label, output)
    #print("Validation acc: %.3f "%(valid_acc.get()[1]))
    val_acc = valid_acc.get()[1]
    return val_acc
Пример #24
0
def validate(network, validation_dataloader):
    """
    Should compute the accuracy of the network on the validation set.
    
    :param network: initialized gluon network to be trained
    :type network: gluon.Block
    
    :param validation_dataloader: the training DataLoader provides batches for data for every iteration
    :type validation_dataloader: gluon.data.DataLoader
    
    :return: validation accuracy
    :rtype: float
    """
    valid_acc = metric.Accuracy()
    for data, label in validation_dataloader:
        output = network(data)
        valid_acc.update(label, output)

    print("Validation Acc: %.3f " % (valid_acc.get()[1]))

    #     raise NotImplementedError()

    return valid_acc.get()[1]
Пример #25
0
def eval_model(features, labels, net, batch_size):
    l_sum = 0
    l_n = 0
    accuracy = metric.Accuracy()
    batch_count = features.shape[0] // batch_size
    preds_all = None
    labels_all = None

    for i in range(batch_count):
        X = features[i * batch_size:(i + 1) * batch_size].as_in_context(
            features.context).T  # batch_size * embed_size
        y = labels[i * batch_size:(i + 1) * batch_size].as_in_context(
            labels.context).T  # batch_size * 1
        output = net(X)
        l = loss(output, y)
        l_sum += l.sum().asscalar()
        l_n += l.size

        preds = nd.argmax(output, axis=1)
        accuracy.update(preds=preds, labels=y)

        if preds_all is None:
            preds_all = preds
        preds_all = nd.concat(preds_all, preds, dim=0)
        if labels_all is None:
            labels_all = y
        labels_all = nd.concat(labels_all, y, dim=0)

    # tp = nd.sum((preds_all == 1) * (labels_all == 1)).asscalar()
    # fp = nd.sum((preds_all == 1) * (labels_all == 0)).asscalar()
    # fn = nd.sum((preds_all == 0) * (labels_all == 1)).asscalar()
    # precision = float(tp) / (tp + fp)
    # recall = float(tp) / (tp + fn)
    # f1 = 2 * (precision * recall) / (precision + recall)

    return l_sum / l_n, accuracy.get()[1], evaluate(preds_all, labels_all)
Пример #26
0
    box_loss = SmoothL1Loss()
    print(box_loss)

    train_data.reshape(label_shape=(3, 5))
    train_data = test_data.sync_label_shape(train_data)

    net = ToySSD(num_class)
    net.initialize(init.Xavier(magnitude=2), ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {
        'learning_rate': 0.1,
        'wd': 5e-4
    })

    ctx = utils.try_gpu()
    cls_metric = metric.Accuracy()
    box_metric = metric.MAE()
    for epoch in range(30):
        # reset data iterators and metrics
        train_data.reset()
        cls_metric.reset()
        box_metric.reset()
        tic = time.time()
        for i, batch in enumerate(train_data):
            x = batch.data[0].as_in_context(ctx)
            y = batch.label[0].as_in_context(ctx)
            with autograd.record():
                anchors, class_preds, box_preds = net(x)
                box_target, box_mask, cls_target = training_targets(
                    anchors, class_preds, y)
                # losses
Пример #27
0
def update_network(queue, nn_update_idx, symbol_filename, params_filename,
                   convert_to_onnx, main_config, train_config: TrainConfig,
                   model_contender_dir):
    """
    Creates a new NN checkpoint in the model contender directory after training using the game files stored in the
     training directory
    :param queue: Queue object used to return items
    :param nn_update_idx: Defines how many updates of the nn has already been done. This index should be incremented
    after every update.
    :param symbol_filename: Architecture definition file
    :param params_filename: Weight file which will be loaded before training
    Updates the neural network with the newly acquired games from the replay memory
    :param convert_to_onnx: Boolean indicating if the network shall be exported to ONNX to allow TensorRT inference
    :param main_config: Dict of the main_config (imported from main_config.py)
    :param train_config: Dict of the train_config (imported from train_config.py)
    :param model_contender_dir: String of the contender directory path
    :return: k_steps_final
    """

    # set the context on CPU, switch to GPU if there is one available (strongly recommended for training)
    ctx = mx.gpu(
        train_config.device_id) if train_config.context == "gpu" else mx.cpu()
    # set a specific seed value for reproducibility
    train_config.nb_parts = len(
        glob.glob(main_config["planes_train_dir"] + '**/*.zip'))
    logging.info("number parts for training: %d" % train_config.nb_parts)
    train_objects = TrainObjects()

    if train_config.nb_parts <= 0:
        raise Exception(
            'No .zip files for training available. Check the path in main_config["planes_train_dir"]:'
            ' %s' % main_config["planes_train_dir"])

    _, x_val, y_val_value, y_val_policy, _, _ = load_pgn_dataset(
        dataset_type="val",
        part_id=0,
        normalize=train_config.normalize,
        verbose=False,
        q_value_ratio=train_config.q_value_ratio)
    y_val_policy = prepare_policy(y_val_policy,
                                  train_config.select_policy_from_plane,
                                  train_config.sparse_policy_label,
                                  train_config.is_policy_from_plane_data)
    val_dataset = gluon.data.ArrayDataset(nd.array(x_val),
                                          nd.array(y_val_value),
                                          nd.array(y_val_policy))
    val_data = gluon.data.DataLoader(val_dataset,
                                     train_config.batch_size,
                                     shuffle=False,
                                     num_workers=train_config.cpu_count)

    symbol = mx.sym.load(symbol_filename)

    # calculate how many iterations per epoch exist
    nb_it_per_epoch = (len(x_val) *
                       train_config.nb_parts) // train_config.batch_size
    # one iteration is defined by passing 1 batch and doing backprop
    train_config.total_it = int(nb_it_per_epoch *
                                train_config.nb_training_epochs)

    train_objects.lr_schedule = CosineAnnealingSchedule(
        train_config.min_lr, train_config.max_lr,
        max(train_config.total_it * .7, 1))
    train_objects.lr_schedule = LinearWarmUp(train_objects.lr_schedule,
                                             start_lr=train_config.min_lr,
                                             length=max(
                                                 train_config.total_it * .25,
                                                 1))
    train_objects.momentum_schedule = MomentumSchedule(
        train_objects.lr_schedule, train_config.min_lr, train_config.max_lr,
        train_config.min_momentum, train_config.max_momentum)

    input_shape = x_val[0].shape
    inputs = mx.sym.var('data', dtype='float32')
    value_out = symbol.get_internals()[main_config['value_output'] + '_output']
    policy_out = symbol.get_internals()[main_config['policy_output'] +
                                        '_output']
    sym = mx.symbol.Group([value_out, policy_out])
    net = mx.gluon.SymbolBlock(sym, inputs)
    net.collect_params().load(params_filename, ctx)

    metrics_gluon = {
        'value_loss':
        metric.MSE(name='value_loss', output_names=['value_output']),
        'value_acc_sign':
        metric.create(acc_sign,
                      name='value_acc_sign',
                      output_names=['value_output'],
                      label_names=['value_label']),
    }

    if train_config.sparse_policy_label:
        print("train with sparse labels")
        # the default cross entropy only supports sparse labels
        metrics_gluon['policy_loss'] = metric.CrossEntropy(
            name='policy_loss',
            output_names=['policy_output'],
            label_names=['policy_label']),
        metrics_gluon['policy_acc'] = metric.Accuracy(
            axis=1,
            name='policy_acc',
            output_names=['policy_output'],
            label_names=['policy_label'])
    else:
        metrics_gluon['policy_loss'] = metric.create(
            cross_entropy,
            name='policy_loss',
            output_names=['policy_output'],
            label_names=['policy_label'])
        metrics_gluon['policy_acc'] = metric.create(
            acc_distribution,
            name='policy_acc',
            output_names=['policy_output'],
            label_names=['policy_label'])

    train_objects.metrics = metrics_gluon

    train_config.export_weights = False  # don't save intermediate weights
    train_agent = TrainerAgent(net,
                               val_data,
                               train_config,
                               train_objects,
                               use_rtpt=False)

    # iteration counter used for the momentum and learning rate schedule
    cur_it = train_config.k_steps_initial * train_config.batch_steps
    (k_steps_final, val_value_loss_final, val_policy_loss_final,
     val_value_acc_sign_final,
     val_policy_acc_final), _ = train_agent.train(cur_it)

    prefix = "%smodel-%.5f-%.5f-%.3f-%.3f" % (
        model_contender_dir, val_value_loss_final, val_policy_loss_final,
        val_value_acc_sign_final, val_policy_acc_final)

    sym_file = prefix + "-symbol.json"
    params_file = prefix + "-" + "%04d.params" % nn_update_idx

    # the export function saves both the architecture and the weights
    net.export(prefix, epoch=nn_update_idx)
    print()
    logging.info("Saved checkpoint to %s-%04d.params", prefix, nn_update_idx)

    if convert_to_onnx:
        convert_mxnet_model_to_onnx(sym_file, params_file,
                                    ["value_out_output", "policy_out_output"],
                                    input_shape, [1, 8, 16], False)

    logging.info("k_steps_final %d" % k_steps_final)
    queue.put(k_steps_final)
Пример #28
0
    train=True).transform_first(mnist_transformer),
                                     batch_size=batch_size,
                                     shuffle=False)

mnist_net = MNistHybrid(no_class=no_class)

# if we set verbose - the printed numbers will be a lot finer
mnist_net.collect_params().initialize(init=mx.init.Xavier(),
                                      force_reinit=True,
                                      verbose=False,
                                      ctx=ctx)
trainer = mx.gluon.Trainer(params=mnist_net.collect_params(),
                           optimizer="sgd",
                           optimizer_params={"learning_rate": lr})
loss_fun = mx.gluon.loss.SoftmaxCrossEntropyLoss()
train_accuracy = metric.Accuracy()
test_accuracy = metric.Accuracy()
ninv_train = 1 / len(train_data)
ninv_test = 1 / len(test_data)

# reshaped as batch, no_channel, w, h
sample_1 = mx.gluon.data.vision.MNIST(train=True)[0][0].reshape((1, 1, 28, 28))

plt.imshow(sample_1.asnumpy().reshape((28, 28)), cmap='gray')
plt.show()

if do_train:
    for a_epoch in range(epochs):
        train_loss, f_train_acc, f_val_acc = .0, .0, .0
        tic = time()
        for a_batch in train_data:
Пример #29
0
def run_training(alpha, queue):
    _, x_val, yv_val, yp_val, plys_to_end, _ = load_pgn_dataset(
        dataset_type='val', part_id=0, verbose=True, normalize=tc.normalize)
    if tc.discount != 1:
        yv_val *= tc.discount**plys_to_end

    if tc.select_policy_from_plane:
        val_iter = mx.io.NDArrayIter(
            {'data': x_val}, {
                'value_label': yv_val,
                'policy_label': np.array(FLAT_PLANE_IDX)[yp_val.argmax(axis=1)]
            }, tc.batch_size)
    else:
        val_iter = mx.io.NDArrayIter({'data': x_val}, {
            'value_label': yv_val,
            'policy_label': yp_val.argmax(axis=1)
        }, tc.batch_size)

    tc.nb_parts = len(glob.glob(main_config['planes_train_dir'] + '**/*'))

    nb_it_per_epoch = (
        len(x_val) * tc.nb_parts
    ) // tc.batch_size  # calculate how many iterations per epoch exist
    # one iteration is defined by passing 1 batch and doing backprop
    tc.total_it = int(nb_it_per_epoch * tc.nb_training_epochs)

    ### Define a Learning Rate schedule
    to.lr_schedule = OneCycleSchedule(start_lr=tc.max_lr / 8,
                                      max_lr=tc.max_lr,
                                      cycle_length=tc.total_it * .3,
                                      cooldown_length=tc.total_it * .6,
                                      finish_lr=tc.min_lr)
    to.lr_schedule = LinearWarmUp(to.lr_schedule,
                                  start_lr=tc.min_lr,
                                  length=tc.total_it / 30)

    ### Momentum schedule
    to.momentum_schedule = MomentumSchedule(to.lr_schedule, tc.min_lr,
                                            tc.max_lr, tc.min_momentum,
                                            tc.max_momentum)
    plot_schedule(to.momentum_schedule,
                  iterations=tc.total_it,
                  ylabel='Momentum')

    input_shape = x_val[0].shape

    beta = np.sqrt(2 / alpha)

    print("alpha:", alpha)
    print("beta:", beta)

    depth = int(round(base_depth * alpha))
    channels = int(round(base_channels * beta))

    kernels = [3] * depth
    se_types = [None] * len(kernels)
    channels_reduced = int(round(channels / 4))

    symbol = rise_mobile_v3_symbol(channels=channels,
                                   channels_operating_init=channels_reduced,
                                   act_type='relu',
                                   channels_value_head=8,
                                   value_fc_size=256,
                                   channels_policy_head=NB_POLICY_MAP_CHANNELS,
                                   grad_scale_value=tc.val_loss_factor,
                                   grad_scale_policy=tc.policy_loss_factor,
                                   dropout_rate=tc.dropout_rate,
                                   select_policy_from_plane=True,
                                   kernels=kernels,
                                   se_types=se_types)

    # create a trainable module on compute context
    model = mx.mod.Module(symbol=symbol,
                          context=ctx,
                          label_names=['value_label', 'policy_label'])
    model.bind(for_training=True,
               data_shapes=[('data', (tc.batch_size, input_shape[0],
                                      input_shape[1], input_shape[2]))],
               label_shapes=val_iter.provide_label)
    model.init_params(
        mx.initializer.Xavier(rnd_type='uniform',
                              factor_type='avg',
                              magnitude=2.24))

    metrics_mxnet = [
        metric.MSE(name='value_loss',
                   output_names=['value_output'],
                   label_names=['value_label']),
        metric.CrossEntropy(name='policy_loss',
                            output_names=['policy_output'],
                            label_names=['policy_label']),
        metric.create(acc_sign,
                      name='value_acc_sign',
                      output_names=['value_output'],
                      label_names=['value_label']),
        metric.Accuracy(axis=1,
                        name='policy_acc',
                        output_names=['policy_output'],
                        label_names=['policy_label'])
    ]

    to.metrics = metrics_mxnet
    train_agent = TrainerAgentMXNET(model,
                                    symbol,
                                    val_iter,
                                    tc,
                                    to,
                                    use_rtpt=True)
    print("model.score(val_iter, to.metrics:",
          model.score(val_iter, to.metrics))

    # Start the training process
    _, (k_steps_best, val_metric_values_best) = train_agent.train(cur_it)

    new_row = {
        'alpha': alpha,
        'beta': beta,
        'depth': depth,
        'channels': channels,
        'k_steps_best': k_steps_best,
        'val_loss': val_metric_values_best['loss'],
        'val_value_loss': val_metric_values_best['value_loss'],
        'val_policy_loss': val_metric_values_best['policy_loss'],
        'val_policy_acc': val_metric_values_best['policy_acc'],
        'val_value_acc': val_metric_values_best['value_acc_sign']
    }

    queue.put(new_row)
    print(new_row)
Пример #30
0
    def __init__(self, args):
        super(Trainer, self).__init__()
        self.args = args
        self.experiment_dir = args.experiment_dir
        if not osp.exists(self.experiment_dir):
            os.makedirs(self.experiment_dir)
            print("The experiment dir has been created:{}".format(
                self.experiment_dir))
        self.trainer_log = TrainerLog(args=args, append=True)
        self.ctx = set_ctx(args=args)
        self.check_point = CheckPoint(args=args,
                                      trainer_log=self.trainer_log,
                                      ctx=self.ctx)
        self.train_loader, self.test_loader = dataloader(args=args)
        self.lr_scheduler = None
        self.optimizer = None
        self.model = None
        if self.train_loader is not None:
            self.train_samples_num = self.train_loader._dataset.__len__()
            print("train dataset samples: {}".format(self.train_samples_num))
        self.test_samples_num = self.test_loader._dataset.__len__()
        print("test dataset samples: {}".format(self.test_samples_num))
        self.resume_epoch = 0
        if args.only_test is False:
            if args.use_tensorboard is True:
                from tensorboardX import SummaryWriter
                self.tb_writer = SummaryWriter(
                    log_dir=osp.join(args.experiment_dir, 'tensorboard'))
            else:
                self.tb_writer = None
            if args.resume is True:
                self.checkpoint_epoch = args.checkpoint_epoch
                self.model = get_networks(args=args, ctx=self.ctx)
                self.resume_epoch = self.check_point.load_checkpoint_parameters(
                    epoch=self.checkpoint_epoch, model=self.model)
            else:
                self.model = get_networks(args=args, ctx=self.ctx)
                self.model.classifier.initialize(ctx=self.ctx)

            self.lr_scheduler = get_lr_scheduler(
                args=args, train_loader=self.train_loader)
            self.optimizer, self.trainer = set_optimizer(
                model=self.model, lr_scheduler=self.lr_scheduler, args=args)
            self.loss_functions = set_loss(args=args, tb_writer=self.tb_writer)
            self.current_epoch = None
        elif args.only_test is True:
            self.checkpoint_epoch = args.checkpoint_epoch
            self.model = get_networks(args=args, ctx=self.ctx)
            self.epoch_test = args.epoch_test
            _ = self.check_point.load_checkpoint_parameters(
                epoch=self.checkpoint_epoch,
                model=self.model,
                epoch_test=self.epoch_test)
        if self.lr_scheduler is not None:
            self.trainer_log.print_use_lr_scheduler()
        if self.optimizer is not None and self.trainer is not None:
            self.trainer_log.print_use_optimizer()
        if self.model is not None:
            self.trainer_log.print_use_network()
        self.test_accuracy_metric = metric.Accuracy()
        self.epochs = args.epochs
        self.train_total = 0
        self.best_accuracy = None
        self.current_accuracy = None