示例#1
0
def Data_Processing(batch_size):
    '''In this Gan tutorial, we don't need the label data.'''
    (train_lbl_one_hot, train_lbl,
     train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz',
                                         'train-images-idx3-ubyte.gz')
    (test_lbl_one_hot, test_lbl,
     test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz',
                                        't10k-images-idx3-ubyte.gz')
    '''data loading referenced by Data Loading API '''
    train_iter = mx.io.NDArrayIter(data={'data': to2d(train_img)},
                                   batch_size=batch_size,
                                   shuffle=True)  # training data
    return train_iter, len(train_img)
示例#2
0
def NeuralNet(epoch, batch_size, save_period):

    time_step = 28
    hidden_unit_number1 = 100
    hidden_unit_number2 = 100
    fc_number = 100
    class_number = 10
    use_cudnn = True
    '''
    load_data

    1. SoftmaxOutput must be

    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl}, batch_size=batch_size) #test data
                                                                or
    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data

    2. LogisticRegressionOutput , LinearRegressionOutput , MakeLoss and so on.. must be

    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data

    '''
    (train_lbl_one_hot, train_lbl,
     train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz',
                                         'train-images-idx3-ubyte.gz')
    (test_lbl_one_hot, test_lbl,
     test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz',
                                        't10k-images-idx3-ubyte.gz')
    '''data loading referenced by Data Loading API '''
    train_iter = mx.io.NDArrayIter(data={'data': train_img},
                                   label={'label': train_lbl_one_hot},
                                   batch_size=batch_size,
                                   shuffle=True)  #training data
    test_iter = mx.io.NDArrayIter(data={'data': test_img},
                                  label={'label':
                                         test_lbl_one_hot})  #test data

    ####################################################-Network-################################################################
    data = mx.sym.Variable('data')
    label = mx.sym.Variable('label')
    data = mx.sym.transpose(data, axes=(1, 0, 2))  # (time,batch,column)
    '''1. RNN cell declaration'''
    '''
    Fusing RNN layers across time step into one kernel.
    Improves speed but is less flexible. Currently only
    supported if using cuDNN on GPU.
    '''

    if use_cudnn:  #faster!!!
        rnn1 = mx.rnn.FusedRNNCell(num_hidden=hidden_unit_number1,
                                   mode="rnn_tanh",
                                   prefix="rnn1_",
                                   get_next_state=True)
        rnn2 = mx.rnn.FusedRNNCell(num_hidden=hidden_unit_number2,
                                   mode="rnn_tanh",
                                   prefix="rnn2_",
                                   get_next_state=True)
    else:
        rnn1 = mx.rnn.RNNCell(num_hidden=hidden_unit_number1,
                              activation='tanh',
                              prefix='rnn1_')
        rnn2 = mx.rnn.RNNCell(num_hidden=hidden_unit_number2,
                              activation='tanh',
                              prefix='rnn2_')
    '''2. Unroll the RNN CELL on a time axis.'''
    ''' unroll's return parameter
    outputs : list of Symbol
              output symbols.
    states : Symbol or nested list of Symbol
            has the same structure as begin_state()

    '''
    #if you see the unroll function
    layer1, state1 = rnn1.unroll(length=time_step,
                                 inputs=data,
                                 merge_outputs=True,
                                 layout='TNC')
    layer1 = mx.sym.Dropout(layer1, p=0.3)
    layer2, state2 = rnn2.unroll(length=time_step,
                                 inputs=layer1,
                                 merge_outputs=True,
                                 layout="TNC")
    rnn_output = mx.sym.Reshape(state2[-1], shape=(-1, hidden_unit_number1))
    '''FullyConnected Layer'''
    affine1 = mx.sym.FullyConnected(data=rnn_output,
                                    num_hidden=fc_number,
                                    name='affine1')
    act1 = mx.sym.Activation(data=affine1, act_type='sigmoid', name='sigmoid1')
    affine2 = mx.sym.FullyConnected(data=act1,
                                    num_hidden=class_number,
                                    name='affine2')
    output = mx.sym.SoftmaxOutput(data=affine2, label=label, name='softmax')

    # We visualize the network structure with output size (the batch_size is ignored.)
    shape = {"data": (time_step, batch_size, 28)}
    mx.viz.plot_network(
        symbol=output,
        shape=shape)  #The diagram can be found on the Jupiter notebook.
    print output.list_arguments()

    # training mod
    mod = mx.module.Module(symbol=output,
                           data_names=['data'],
                           label_names=['label'],
                           context=mx.gpu(0))
    # test mod
    test = mx.module.Module(symbol=output,
                            data_names=['data'],
                            label_names=['label'],
                            context=mx.gpu(0))

    # Network information print
    print mod.data_names
    print mod.label_names
    print train_iter.provide_data
    print train_iter.provide_label
    '''if the below code already is declared by mod.fit function, thus we don't have to write it.
    but, when you load the saved weights, you must write the below code.'''
    mod.bind(data_shapes=train_iter.provide_data,
             label_shapes=train_iter.provide_label)

    # weights save

    model_name = 'weights/Neural_Net'
    checkpoint = mx.callback.do_checkpoint(model_name, period=save_period)

    #weights load

    # When you want to load the saved weights, uncomment the code below.
    symbol, arg_params, aux_params = mx.model.load_checkpoint(model_name, 100)

    #the below code needs mod.bind, but If arg_params and aux_params is set in mod.fit, you do not need the code below, nor do you need mod.bind.
    mod.set_params(arg_params, aux_params)

    mod.fit(
        train_iter,
        initializer=mx.initializer.Xavier(rnd_type='gaussian',
                                          factor_type="avg",
                                          magnitude=1),
        optimizer='adam',
        optimizer_params={'learning_rate': 0.001},
        eval_metric=mx.metric.MSE(),
        # Once the loaded parameters are declared here,You do not need to declare mod.set_params,mod.bind
        num_epoch=epoch,
        arg_params=None,
        aux_params=None,
        epoch_end_callback=checkpoint)

    # Network information print
    print mod.data_shapes
    print mod.label_shapes
    print mod.output_shapes
    print mod.get_params()
    print mod.get_outputs()
    print "training_data : {}".format(mod.score(train_iter, ['mse', 'acc']))
    print "Optimization complete."

    #################################TEST####################################
    '''load method1 - load the saved parameter'''
    #symbol, arg_params, aux_params = mx.model.load_checkpoint(model_name, 100)
    '''load method2 - load the training mod.get_params() directly'''
    #arg_params, aux_params = mod.get_params()
    '''load method3 - using the shared_module'''
    """
    Parameters
    shared_module : Module
        Default is `None`. This is used in bucketing. When not `None`, the shared module
        essentially corresponds to a different bucket -- a module with different symbol
        but with the same sets of parameters (e.g. unrolled RNNs with different lengths).
    """
    test.bind(data_shapes=test_iter.provide_data,
              label_shapes=test_iter.provide_label,
              shared_module=mod,
              for_training=False)
    '''Annotate only when running test data. and Uncomment only if it is 'load method1' or 'load method2' '''
    #test.set_params(arg_params, aux_params)

    #batch by batch accuracy
    #To use the code below, Test / batchsize must be an integer.
    '''for preds, i_batch, eval_batch in mod.iter_predict(test_iter):
        pred_label = preds[0].asnumpy().argmax(axis=1)
        label = eval_batch.label[0].asnumpy().argmax(axis=1)
        print('batch %d, accuracy %f' % (i_batch, float(sum(pred_label == label)) / len(label)))
    '''
    '''test'''
    result = test.predict(test_iter).asnumpy().argmax(axis=1)
    print 'Final accuracy : {}%'.format(
        float(sum(test_lbl == result)) / len(result) * 100.0)
示例#3
0
def NeuralNet(epoch, batch_size, save_period):
    '''load_data
    1. SoftmaxOutput must be

    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl}, batch_size=batch_size) #test data

    2. LogisticRegressionOutput , LinearRegressionOutput , MakeLoss and so on.. must be

    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data
    '''

    (train_lbl_one_hot, train_lbl,
     train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz',
                                         'train-images-idx3-ubyte.gz')
    (test_lbl_one_hot, test_lbl,
     test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz',
                                        't10k-images-idx3-ubyte.gz')
    '''data loading referenced by Data Loading API '''
    train_iter = mx.io.NDArrayIter(data={'data': to4d(train_img)},
                                   label={'label': train_lbl_one_hot},
                                   batch_size=batch_size,
                                   shuffle=True)  #training data
    test_iter = mx.io.NDArrayIter(data={'data': to4d(test_img)},
                                  label={'label':
                                         test_lbl_one_hot})  #test data
    '''neural network'''
    data = mx.sym.Variable('data')
    label = mx.sym.Variable('label')

    # first convolution layer
    conv1 = mx.sym.Convolution(data=data, kernel=(5, 5), num_filter=30)
    conv1 = mx.sym.BatchNorm(data=conv1,
                             fix_gamma=False,
                             use_global_stats=True)
    relu1 = mx.sym.Activation(
        data=conv1, name='relu_c1',
        act_type="relu")  # -> size : (batch_size,30,24,24)
    pool1 = mx.sym.Pooling(data=relu1,
                           pool_type="max",
                           kernel=(2, 2),
                           stride=(2, 2))  # -> size : (batch_size,30,12,12)

    # second convolution layer
    conv2 = mx.sym.Convolution(data=pool1, kernel=(5, 5), num_filter=60)
    conv2 = mx.sym.BatchNorm(data=conv2,
                             fix_gamma=False,
                             use_global_stats=True)
    relu2 = mx.sym.Activation(data=conv2, name='relu_c2',
                              act_type="relu")  # -> size : (batch_size,60,8,8)
    pool2 = mx.sym.Pooling(data=relu2,
                           pool_type="max",
                           kernel=(2, 2),
                           stride=(2, 2))  # -> size : (batch_size,60,4,4)

    #flatten the data
    flatten = mx.sym.Flatten(data=pool2)

    # first fullyconnected layer
    affine1 = mx.sym.FullyConnected(data=flatten, name='fc1', num_hidden=50)
    affine1 = mx.sym.BatchNorm(data=affine1,
                               fix_gamma=False,
                               use_global_stats=True)
    hidden1 = mx.sym.Activation(data=affine1, name='relu_f1', act_type="relu")

    # two fullyconnected layer
    affine2 = mx.sym.FullyConnected(data=hidden1, name='fc2', num_hidden=50)
    affine2 = mx.sym.BatchNorm(data=affine2,
                               fix_gamma=False,
                               use_global_stats=True)
    hidden2 = mx.sym.Activation(data=affine2, name='relu_f2', act_type="relu")
    output_affine = mx.sym.FullyConnected(data=hidden2,
                                          name='fc3',
                                          num_hidden=10)

    output = mx.sym.SoftmaxOutput(data=output_affine, label=label)

    # We visualize the network structure with output size (the batch_size is ignored.)
    shape = {"data": (batch_size, 1, 28, 28)}
    mx.viz.plot_network(
        symbol=output,
        shape=shape)  #The diagram can be found on the Jupiter notebook.
    print output.list_arguments()

    # Fisrt optimization method
    # weights save

    model_name = 'weights/Neural_Net'
    checkpoint = mx.callback.do_checkpoint(model_name, period=save_period)

    # training mod
    mod = mx.mod.Module(symbol=output,
                        data_names=['data'],
                        label_names=['label'],
                        context=mx.gpu(0))
    # test mod
    test = mx.mod.Module(symbol=output,
                         data_names=['data'],
                         label_names=['label'],
                         context=mx.gpu(0))

    # Network information print
    print mod.data_names
    print mod.label_names
    print train_iter.provide_data
    print train_iter.provide_label
    '''if the below code already is declared by mod.fit function, thus we don't have to write it.
    but, when you load the saved weights, you must write the below code.'''
    mod.bind(data_shapes=train_iter.provide_data,
             label_shapes=train_iter.provide_label)

    #weights load

    # When you want to load the saved weights, uncomment the code below.
    symbol, arg_params, aux_params = mx.model.load_checkpoint(model_name, 100)

    #the below code needs mod.bind, but If arg_params and aux_params is set in mod.fit, you do not need the code below, nor do you need mod.bind.
    mod.set_params(arg_params, aux_params)

    mod.fit(
        train_iter,
        initializer=mx.initializer.Xavier(rnd_type='gaussian',
                                          factor_type="avg",
                                          magnitude=1),
        optimizer='adam',
        optimizer_params={'learning_rate': 0.001},
        eval_metric=mx.metric.MSE(),
        # Once the loaded parameters are declared here,You do not need to declare mod.set_params,mod.bind
        num_epoch=epoch,
        arg_params=None,
        aux_params=None,
        epoch_end_callback=checkpoint)

    # Network information print
    print mod.data_shapes
    print mod.label_shapes
    print mod.output_shapes
    print mod.get_params()
    print mod.get_outputs()
    print "training_data : {}".format(mod.score(train_iter, ['mse', 'acc']))
    print "Optimization complete."
    #################################TEST####################################
    '''load method1 - load the saved parameter'''
    #symbol, arg_params, aux_params = mx.model.load_checkpoint(model_name, 100)
    '''load method2 - load the training mod.get_params() directly'''
    #arg_params, aux_params = mod.get_params()
    '''load method3 - using the shared_module'''
    """
    Parameters
    shared_module : Module
        Default is `None`. This is used in bucketing. When not `None`, the shared module
        essentially corresponds to a different bucket -- a module with different symbol
        but with the same sets of parameters (e.g. unrolled RNNs with different lengths).
    """
    test.bind(data_shapes=test_iter.provide_data,
              label_shapes=test_iter.provide_label,
              shared_module=mod,
              for_training=False)
    '''Annotate only when running test data. and Uncomment only if it is 'load method1' or 'load method2' '''
    #test.set_params(arg_params, aux_params)

    #batch by batch accuracy
    #To use the code below, Test / batchsize must be an integer.
    '''for preds, i_batch, eval_batch in mod.iter_predict(test_iter):
        pred_label = preds[0].asnumpy().argmax(axis=1)
        label = eval_batch.label[0].asnumpy().argmax(axis=1)
        print('batch %d, accuracy %f' % (i_batch, float(sum(pred_label == label)) / len(label)))
    '''
    '''test'''
    result = test.predict(test_iter).asnumpy().argmax(axis=1)
    print 'Final accuracy : {}%'.format(
        float(sum(test_lbl == result)) / len(result) * 100.0)
    '''
示例#4
0
def CapsNet(reconstruction,
            epoch,
            batch_size,
            save_period,
            load_period,
            ctx=mx.gpu(0),
            graphviz=False):

    (train_lbl_one_hot, train_lbl,
     train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz',
                                         'train-images-idx3-ubyte.gz')
    (test_lbl_one_hot, test_lbl,
     test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz',
                                        't10k-images-idx3-ubyte.gz')
    '''
    In the paper,'Training is performed on 28? 28 MNIST images have been shifted by up to 2 pixels in each direction with zero padding', But
    In this implementation, the original data is not transformed as above.
    '''
    '''data loading referenced by Data Loading API '''

    train_iter = mx.io.NDArrayIter(
        data={'data': to4d(train_img)},
        label={'label': train_lbl},
        batch_size=batch_size,
        shuffle=True,
        last_batch_handle='roll_over')  #training data
    test_iter = mx.io.NDArrayIter(data={'data': to4d(test_img)},
                                  label={'label': test_lbl},
                                  batch_size=batch_size,
                                  shuffle=False,
                                  last_batch_handle='roll_over')  #test data
    '''
    reconstruction=true  
    output_list[0] -> total_loss=margin_loss+reconstruction_loss
    output_list[1] -> capsule_output
    output_list[2] -> reconstruction_output
    
    reconstruction=False
    output_list[0] -> margin_loss
    output_list[1] -> capsule_output
    '''

    output_list = capsule(reconstruction=reconstruction,
                          routing_iteration=1,
                          batch_size=batch_size)

    # (1) Get the name of the 'argument'
    arg_names = output_list[0].list_arguments()

    #caustion!!! in hear, need label's shape
    arg_shapes, output_shapes, aux_shapes = output_list[0].infer_shape(
        data=(batch_size, 1, 28, 28), label=(batch_size, ))

    # (2) Make space for 'argument' - mutable type - If it is declared as below, it is kept in memory.
    arg_dict = dict(
        zip(arg_names, [
            mx.nd.random.normal(loc=0, scale=0.01, shape=shape, ctx=ctx)
            for shape in arg_shapes
        ]))
    grad_dict = dict(
        zip(arg_names[1:-1],
            [mx.nd.zeros(shape, ctx=ctx)
             for shape in arg_shapes[1:-1]]))  #Exclude input output

    aux_args = [mx.nd.zeros(shape=shape, ctx=ctx) for shape in aux_shapes]

    if epoch == 0 and graphviz == True:
        if reconstruction:
            total_loss = mx.viz.plot_network(symbol=output_list[0],
                                             shape={
                                                 "data":
                                                 (batch_size, 1, 28, 28),
                                                 "label": (batch_size, )
                                             })
            total_loss.view("total_loss")
        else:
            margin_loss = mx.viz.plot_network(symbol=output_list[0],
                                              shape={
                                                  "data":
                                                  (batch_size, 1, 28, 28),
                                                  "label": (batch_size, )
                                              })
            margin_loss.view("margin_loss")

    if reconstruction:  #reconstruction=True
        if os.path.exists(
                "weights/MNIST_Reconstruction_weights-{}.param".format(
                    load_period)):
            print("MNIST_Reconstruction_weights-{}.param exists".format(
                load_period))
            pretrained = mx.nd.load(
                "weights/MNIST_Reconstruction_weights-{}.param".format(
                    load_period))
            for name in arg_names:
                if name == "data" or name == "label":
                    continue
                else:
                    arg_dict[name] = pretrained[name]
        else:
            print("weight initialization")

    else:  #reconstruction=False
        if os.path.exists(
                "weights/MNIST_weights-{}.param".format(load_period)):
            print("MNIST_weights-{}.param exists".format(load_period))
            pretrained = mx.nd.load(
                "weights/MNIST_weights-{}.param".format(load_period))
            for name in arg_names:
                if name == "data" or name == "label":
                    continue
                else:
                    arg_dict[name] = pretrained[name]
        else:
            print("weight initialization")

    network = output_list[0].bind(ctx=ctx,
                                  args=arg_dict,
                                  args_grad=grad_dict,
                                  grad_req='write',
                                  aux_states=aux_args)

    if reconstruction:
        capsule_output = output_list[1].bind(ctx=ctx,
                                             args=arg_dict,
                                             args_grad=grad_dict,
                                             grad_req='null',
                                             aux_states=aux_args,
                                             shared_exec=network)
        reconstruction_output = output_list[2].bind(ctx=ctx,
                                                    args=arg_dict,
                                                    args_grad=grad_dict,
                                                    grad_req='null',
                                                    aux_states=aux_args,
                                                    shared_exec=network)
    else:
        capsule_output = output_list[1].bind(ctx=ctx,
                                             args=arg_dict,
                                             args_grad=grad_dict,
                                             grad_req='null',
                                             aux_states=aux_args,
                                             shared_exec=network)

    #optimizer
    state = []
    optimizer = mx.optimizer.Adam(learning_rate=0.001)

    for shape in arg_shapes[1:-1]:
        state.append(
            optimizer.create_state(0, mx.nd.zeros(shape=shape, ctx=ctx)))

    if not os.path.exists("weights"):
        os.makedirs("weights")

    # learning
    for i in tqdm(range(1, epoch + 1, 1)):
        '''
        In the paper,'including the exponentially decaying learning rate', But
        In this implementation, Multiply the learning_rate by 0.99 for every 10 steps.
        '''
        if i % 10 == 0:
            optimizer.set_learning_rate(0.001 * pow(0.99, i))

        train_iter.reset()
        for batch in train_iter:
            '''
            <very important>
            # mean of [:]  : This sets the contents of the array instead of setting the array to a new value not overwriting the variable.
            # For more information, see reference
            '''
            arg_dict["data"][:] = batch.data[0]
            arg_dict["label"][:] = batch.label[0]
            out = network.forward()
            network.backward(out)

            for j, name in enumerate(arg_names[1:-1]):
                optimizer.update(0, arg_dict[name], grad_dict[name], state[j])

        if reconstruction:
            print("epoch : {}, last total loss : {}".format(
                i,
                mx.nd.mean(network.outputs[0]).asscalar()))
            if i % save_period == 0:

                mx.nd.save(
                    "weights/MNIST_Reconstruction_weights-{}.param".format(i),
                    arg_dict)
        else:
            print("epoch : {}, last margin loss : {}".format(
                i,
                mx.nd.mean(network.outputs[0]).asscalar()))
            if i % save_period == 0:
                mx.nd.save("weights/MNIST_weights-{}.param".format(i),
                           arg_dict)

        test_accuracy = evaluate_accuracy(test_iter, capsule_output)
        print("Test_acc : {0:0.3f}%".format(test_accuracy * 100))

    print("#Optimization complete\n")

    test_accuracy = evaluate_accuracy(test_iter, capsule_output)
    print("Test_acc : {0:0.3f}%".format(test_accuracy * 100))
    if reconstruction:
        generate_image(test_iter, reconstruction_output)
示例#5
0
def NeuralNet(epoch,batch_size,save_period,load_weights):

    '''
    load_data

    1. SoftmaxOutput must be

    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl}, batch_size=batch_size) #test data

    2. LogisticRegressionOutput , LinearRegressionOutput , MakeLoss and so on.. must be

    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data

    '''

    (train_lbl_one_hot, train_lbl, train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz','train-images-idx3-ubyte.gz')
    (test_lbl_one_hot, test_lbl, test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz')

    '''data loading referenced by Data Loading API '''
    train_iter = mx.io.NDArrayIter(data={'data' : to2d(train_img)},label={'one_hot_label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to2d(test_img)}, label={'one_hot_label' : test_lbl_one_hot}) #test data

    '''neural network'''
    data = mx.sym.Variable('data')
    label = mx.sym.Variable('one_hot_label')

    # first_hidden_layer
    affine1 = mx.sym.FullyConnected(data=data,name='fc1',num_hidden=50)
    hidden1 = mx.sym.Activation(data=affine1, name='sigmoid1', act_type="sigmoid")

    # two_hidden_layer
    affine2 = mx.sym.FullyConnected(data=hidden1, name='fc2', num_hidden=50)
    hidden2 = mx.sym.Activation(data=affine2, name='sigmoid2', act_type="sigmoid")

    # output_layer
    output_affine = mx.sym.FullyConnected(data=hidden2, name='fc3', num_hidden=10)

    '''
    Apply a custom operator implemented in a frontend language 
    Custom operator should override required methods like forward and backward. 
    The custom operator must be registered before it can be used. please check tutorial here.
    
    parameters :  data ( NDArray[] - Input data for the custom operator)
                  op_type(string) - Name of the custom operator . this is the name that is passed to mx.operator.register the operator
                  out ( NDArray , optional )  The output NDArray to hold the result
                  
    Returns : out - The output of this function.
    Return : NDarray or list of NDArrays
    '''

    #custom layer -> I just recommend using makeloss.
    #You can not write label data if you write it like api in mxnet page.
    #I do not know why, but it seems to be written as follows.
    output = mx.sym.Custom(data= output_affine , label = label , grad_scale = 1 , name="SoftmaxOutput", op_type = 'SoftmaxOutput') #

    # We visualize the network structure with output size (the batch_size is ignored.)
    shape = {"data": (batch_size,784)}
    graph=mx.viz.plot_network(symbol=output,shape=shape)#The diagram can be found on the Jupiter notebook.
    if epoch==1:
        graph.view()
    print(output.list_arguments())
    print(output.list_outputs())

    # training mod
    mod = mx.mod.Module(symbol=output, data_names=['data'], label_names=['one_hot_label'], context=mx.gpu(0))
    mod.bind(data_shapes=train_iter.provide_data,label_shapes=train_iter.provide_label)

    #load the saved mod data
    weghts_path="weights/mod-{}.params".format(load_weights)

    if os.path.exists(weghts_path) :
        print("Load weights")
        mod.load_params(weghts_path)
    else :
        mod.init_params(initializer=mx.initializer.Xavier(rnd_type='uniform', factor_type='avg', magnitude=1))

    mod.init_optimizer(optimizer='adam',optimizer_params={'learning_rate': 0.001})

    # test mod
    test = mx.mod.Module(symbol=output, data_names=['data'], label_names=['one_hot_label'], context=mx.gpu(0))

    '''load method1 - using the shared_module'''
    """
    Parameters
    shared_module : Module
        Default is `None`. This is used in bucketing. When not `None`, the shared module
        essentially corresponds to a different bucket -- a module with different symbol
        but with the same sets of parameters (e.g. unrolled RNNs with different lengths).
    """
    test.bind(data_shapes=test_iter.provide_data, label_shapes=test_iter.provide_label,shared_module=mod,for_training=False)

    # Network information print
    print(mod.data_names)
    print(mod.label_names)
    print(train_iter.provide_data)
    print(train_iter.provide_label)

    '''############Although not required, the following code should be declared.#################'''

    '''make evaluation method 1 - Using existing ones.
        metrics = {
        'acc': Accuracy,
        'accuracy': Accuracy,
        'ce': CrossEntropy,
        'f1': F1,
        'mae': MAE,
        'mse': MSE,
        'rmse': RMSE,
        'top_k_accuracy': TopKAccuracy
    }'''

    metric = mx.metric.create(['acc','mse'])

    '''make evaluation method 2 - Making new things.'''
    '''
    Custom evaluation metric that takes a NDArray function.
    Parameters:
    •feval (callable(label, pred)) – Customized evaluation function.
    •name (str, optional) – The name of the metric.
    •allow_extra_outputs (bool) – If true, the prediction outputs can have extra outputs.
    This is useful in RNN, where the states are also produced in outputs for forwarding.
    '''

    def zero(label, pred):
        return 0

    null = mx.metric.CustomMetric(zero)

    for epoch in range(1,epoch+1,1):
        print("epoch : {}".format(epoch))
        train_iter.reset()
        #total_batch_number = np.ceil(len(train_img) / (batch_size * 1.0))
        #temp=0
        for batch in train_iter:
            mod.forward(batch, is_train=True)
            mod.backward()
            mod.update()

            #cost
            #temp+=(mod.get_outputs()[0].asnumpy()-batch.label[0].asnumpy())

        #cost = (0.5*np.square(temp)/(total_batch_number*1.0)).mean()
        result = test.predict(test_iter).asnumpy().argmax(axis=1)
        print("training_data : {}".format(mod.score(train_iter, ['mse', 'acc'])))
        print('accuracy during learning.  : {}%'.format(float(sum(test_lbl == result)) / len(result) * 100.0))
        #print "cost value : {}".format(cost)

        if not os.path.exists("weights"):
            os.makedirs("weights")

        #Save the data
        if epoch%save_period==0:
            print('Saving weights')
            mod.save_params("weights/mod-{}.params" .format(epoch))

    # Network information print
    print(mod.data_shapes)
    print(mod.label_shapes)
    print(mod.output_shapes)
    print(mod.get_params())
    print(mod.get_outputs())

    print("Optimization complete.")
    #################################TEST####################################
    '''load method2 - load the training mod.get_params() directly'''
    #arg_params, aux_params = mod.get_params()

    '''Annotate only when running test data. and Uncomment only if it is 'load method2' '''
    #test.set_params(arg_params, aux_params)

    #batch by batch accuracy
    #To use the code below, Test / batchsize must be an integer.
    '''for preds, i_batch, eval_batch in mod.iter_predict(test_iter):
        pred_label = preds[0].asnumpy().argmax(axis=1)
        label = eval_batch.label[0].asnumpy().argmax(axis=1)
        print('batch %d, accuracy %f' % (i_batch, float(sum(pred_label == label)) / len(label)))
    '''
    '''test'''
    result = test.predict(test_iter).asnumpy().argmax(axis=1)
    print('Final accuracy : {}%' .format(float(sum(test_lbl == result)) / len(result)*100.0))
示例#6
0
def NeuralNet(epoch, batch_size, save_period, tensorboard):
    '''load_data
    1. SoftmaxOutput must be

    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl}, batch_size=batch_size) #test data

    2. LogisticRegressionOutput , LinearRegressionOutput , MakeLoss and so on.. must be

    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data
    '''

    (train_lbl_one_hot, train_lbl,
     train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz',
                                         'train-images-idx3-ubyte.gz')
    (test_lbl_one_hot, test_lbl,
     test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz',
                                        't10k-images-idx3-ubyte.gz')
    '''data loading referenced by Data Loading API '''
    train_iter = mx.io.NDArrayIter(data={'data': to4d(train_img)},
                                   label={'label': train_lbl_one_hot},
                                   batch_size=batch_size,
                                   shuffle=True)  #training data
    test_iter = mx.io.NDArrayIter(data={'data': to4d(test_img)},
                                  label={'label':
                                         test_lbl_one_hot})  #test data
    '''neural network'''
    data = mx.sym.Variable('data')
    label = mx.sym.Variable('label')

    # first convolution layer
    conv1 = mx.sym.Convolution(data=data, kernel=(5, 5), num_filter=30)
    conv1 = mx.sym.BatchNorm(data=conv1,
                             fix_gamma=False,
                             use_global_stats=True)
    relu1 = mx.sym.Activation(
        data=conv1, name='relu_c1',
        act_type="relu")  # -> size : (batch_size,30,24,24)
    pool1 = mx.sym.Pooling(data=relu1,
                           pool_type="max",
                           kernel=(2, 2),
                           stride=(2, 2))  # -> size : (batch_size,30,12,12)

    # second convolution layer
    conv2 = mx.sym.Convolution(data=pool1, kernel=(5, 5), num_filter=60)
    conv2 = mx.sym.BatchNorm(data=conv2,
                             fix_gamma=False,
                             use_global_stats=True)
    relu2 = mx.sym.Activation(data=conv2, name='relu_c2',
                              act_type="relu")  # -> size : (batch_size,60,8,8)
    pool2 = mx.sym.Pooling(data=relu2,
                           pool_type="max",
                           kernel=(2, 2),
                           stride=(2, 2))  # -> size : (batch_size,60,4,4)

    #flatten the data
    flatten = mx.sym.Flatten(data=pool2)

    # first fullyconnected layer
    affine1 = mx.sym.FullyConnected(data=flatten, name='fc1', num_hidden=100)
    affine1 = mx.sym.BatchNorm(data=affine1,
                               fix_gamma=False,
                               use_global_stats=True)
    hidden1 = mx.sym.Activation(data=affine1, name='relu_f1', act_type="relu")

    # two fullyconnected layer
    affine2 = mx.sym.FullyConnected(data=hidden1, name='fc2', num_hidden=100)
    affine2 = mx.sym.BatchNorm(data=affine2,
                               fix_gamma=False,
                               use_global_stats=True)
    hidden2 = mx.sym.Activation(data=affine2, name='relu_f2', act_type="relu")
    output_affine = mx.sym.FullyConnected(data=hidden2,
                                          name='fc3',
                                          num_hidden=10)

    output = mx.sym.SoftmaxOutput(data=output_affine, label=label)

    # We visualize the network structure with output size (the batch_size is ignored.)
    shape = {"data": (batch_size, 1, 28, 28)}
    mx.viz.plot_network(
        symbol=output,
        shape=shape)  #The diagram can be found on the Jupiter notebook.
    print output.list_arguments()

    # training mod
    mod = mx.mod.Module(symbol=output,
                        data_names=['data'],
                        label_names=['label'],
                        context=mx.gpu(0))
    mod.bind(data_shapes=train_iter.provide_data,
             label_shapes=train_iter.provide_label)

    #load the saved mod data
    mod.load_params("weights/mod-100.params")

    mod.init_params(initializer=mx.initializer.Xavier(
        rnd_type='gaussian', factor_type='avg', magnitude=1))
    mod.init_optimizer(optimizer='adam',
                       optimizer_params={'learning_rate': 0.001})

    # test mod
    test = mx.mod.Module(symbol=output,
                         data_names=['data'],
                         label_names=['label'],
                         context=mx.gpu(0))
    '''load method1 - using the shared_module'''
    """
    Parameters
    shared_module : Module
        Default is `None`. This is used in bucketing. When not `None`, the shared module
        essentially corresponds to a different bucket -- a module with different symbol
        but with the same sets of parameters (e.g. unrolled RNNs with different lengths).
    """
    test.bind(data_shapes=test_iter.provide_data,
              label_shapes=test_iter.provide_label,
              shared_module=mod,
              for_training=False)

    # Network information print
    print mod.data_names
    print mod.label_names
    print train_iter.provide_data
    print train_iter.provide_label
    '''############Although not required, the following code should be declared.#################'''
    '''make evaluation method 1 - Using existing ones.
        metrics = {
        'acc': Accuracy,
        'accuracy': Accuracy,
        'ce': CrossEntropy,
        'f1': F1,
        'mae': MAE,
        'mse': MSE,
        'rmse': RMSE,
        'top_k_accuracy': TopKAccuracy
    }'''

    metric = mx.metric.create(['acc', 'mse'])
    '''make evaluation method 2 - Making new things.'''
    '''
    Custom evaluation metric that takes a NDArray function.
    Parameters:
    •feval (callable(label, pred)) – Customized evaluation function.
    •name (str, optional) – The name of the metric.
    •allow_extra_outputs (bool) – If true, the prediction outputs can have extra outputs.
    This is useful in RNN, where the states are also produced in outputs for forwarding.
    '''
    def zero(label, pred):
        return 0

    null = mx.metric.CustomMetric(zero)

    for epoch in xrange(1, epoch + 1, 1):
        print "epoch : {}".format(epoch)
        train_iter.reset()
        total_batch_number = np.ceil(len(train_img) / (batch_size * 1.0))
        temp = 0
        for batch in train_iter:
            mod.forward(batch, is_train=True)
            mod.backward()
            mod.update()
            '''tensorboard part'''
            temp += (mod.get_outputs()[0].asnumpy() - batch.label[0].asnumpy())
        cost = (0.5 * np.square(temp) / (total_batch_number * 1.0)).mean()
        print "MSE_cost value : {}".format(cost)

        result = test.predict(test_iter).asnumpy().argmax(axis=1)
        print "training_data : {}".format(mod.score(train_iter,
                                                    ['mse', 'acc']))
        print 'accuracy during learning.  : {}%'.format(
            float(sum(test_lbl == result)) / len(result) * 100.0)
        '''
            class SummaryWriter(object):
        """Writes `Summary` directly to event files.
        The `SummaryWriter` class provides a high-level api to create an event file in a
        given directory and add summaries and events to it. The class updates the
        file contents asynchronously. This allows a training program to call methods
        to add data to the file directly from the training loop, without slowing down
        training.
        """
        def __init__(self, log_dir):
            self.file_writer = FileWriter(logdir=log_dir)
    
        def add_scalar(self, name, scalar_value, global_step=None):
            self.file_writer.add_summary(scalar(name, scalar_value), global_step)
    
        def add_histogram(self, name, values):
            self.file_writer.add_summary(histogram(name, values))
    
        def add_image(self, tag, img_tensor):
            self.file_writer.add_summary(image(tag, img_tensor))
    
        def close(self):
            self.file_writer.flush()
            self.file_writer.close()
    
        def __del__(self):
            if self.file_writer is not None:
                self.file_writer.close()   
        '''
        '''tensorboard_part'''
        if (epoch % tensorboard) == 0:

            arg_params, aux_params = mod.get_params()

            #write scalar values
            summary_writer.add_scalar(name="MSE_cost",
                                      scalar_value=cost,
                                      global_step=epoch)

            for arg_key, arg_value, aux_key, aux_value in zip(
                    arg_params.keys(), arg_params.values(), aux_params.keys(),
                    aux_params.values()):
                #write matrix values
                summary_writer.add_histogram(
                    name=arg_key, values=arg_value.asnumpy().ravel())
                #summary_writer.add_histogram(name=aux_key, values=aux_value.asnumpy().ravel())
                '''or'''
                #summary_writer.add_histogram(name=arg_key, values=arg_value.asnumpy().flatten())
                #summary_writer.add_histogram(name=aux_key, values=aux_value.asnumpy().flatten())

        #Save the data
        if (epoch % save_period) == 0:
            print('Saving weights')
            mod.save_params("weights/mod-{}.params".format(epoch))
    '''tensorboard_part'''
    summary_writer.close()

    # Network information print
    print mod.data_shapes
    print mod.label_shapes
    print mod.output_shapes
    print mod.get_params()
    print mod.get_outputs()

    print "Optimization complete."
    #################################TEST####################################
    '''load method2 - load the training mod.get_params() directly'''
    #arg_params, aux_params = mod.get_params()
    '''Annotate only when running test data. and Uncomment only if it is 'load method2' '''
    #test.set_params(arg_params, aux_params)

    #batch by batch accuracy
    #To use the code below, Test / batchsize must be an integer.
    '''for preds, i_batch, eval_batch in mod.iter_predict(test_iter):
        pred_label = preds[0].asnumpy().argmax(axis=1)
        label = eval_batch.label[0].asnumpy().argmax(axis=1)
        print('batch %d, accuracy %f' % (i_batch, float(sum(pred_label == label)) / len(label)))
    '''
    '''test'''
    result = test.predict(test_iter).asnumpy().argmax(axis=1)
    print 'Final accuracy : {}%'.format(
        float(sum(test_lbl == result)) / len(result) * 100.0)
示例#7
0
def CapsNet(reconstruction,
            epoch,
            batch_size,
            save_period,
            load_period,
            ctx=mx.gpu(0),
            graphviz=False):

    (train_lbl_one_hot, train_lbl,
     train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz',
                                         'train-images-idx3-ubyte.gz')
    (test_lbl_one_hot, test_lbl,
     test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz',
                                        't10k-images-idx3-ubyte.gz')
    '''
    In the paper,'Training is performed on 28? 28 MNIST images have been shifted by up to 2 pixels in each direction with zero padding', But
    In this implementation, the original data is not transformed as above.
    '''
    '''data loading referenced by Data Loading API '''

    train_iter = mx.io.NDArrayIter(
        data={'data': to4d(train_img)},
        label={'label': train_lbl},
        batch_size=batch_size,
        shuffle=True,
        last_batch_handle='roll_over')  #training data
    test_iter = mx.io.NDArrayIter(data={'data': to4d(test_img)},
                                  label={'label': test_lbl},
                                  batch_size=batch_size,
                                  shuffle=False,
                                  last_batch_handle='roll_over')  #test data
    '''
    reconstruction=true  
    output_list[0] -> total_loss=margin_loss+reconstruction_loss
    output_list[1] -> capsule_output
    output_list[2] -> reconstruction_output
    
    reconstruction=False
    output_list[0] -> margin_loss
    output_list[1] -> capsule_output
    '''
    output_list = capsule(reconstruction=reconstruction,
                          routing_iteration=1,
                          batch_size=batch_size)

    # training mod
    network = mx.mod.Module(symbol=output_list[0],
                            data_names=['data'],
                            label_names=['label'],
                            context=ctx)
    network.bind(data_shapes=train_iter.provide_data,
                 label_shapes=train_iter.provide_label,
                 for_training=True)

    if epoch == 0 and graphviz == True:

        if reconstruction:
            total_loss = mx.viz.plot_network(symbol=output_list[0],
                                             shape={
                                                 "data":
                                                 (batch_size, 1, 28, 28),
                                                 "label": (batch_size, )
                                             })
            total_loss.view("total_loss")
        else:
            margin_loss = mx.viz.plot_network(symbol=output_list[0],
                                              shape={
                                                  "data":
                                                  (batch_size, 1, 28, 28),
                                                  "label": (batch_size, )
                                              })
            margin_loss.view("margin_loss")

    if reconstruction:  #reconstruction==True
        if os.path.exists(
                "weights/MNIST_Reconstruction_weights-{}.param".format(
                    load_period)):
            print("MNIST_Reconstruction_weights-{}.param exists".format(
                load_period))
            network.load_params(
                "weights/MNIST_Reconstruction_weights-{}.param".format(
                    load_period))
        else:
            print("weight initialization")
            network.init_params(initializer=mx.initializer.Normal(sigma=0.1))

    else:  #reconstruction=False
        if os.path.exists(
                "weights/MNIST_weights-{}.param".format(load_period)):
            print("MNIST_weights-{}.param exists".format(load_period))
            network.load_params(
                "weights/MNIST_weights-{}.param".format(load_period))
        else:
            print("weight initialization")
            network.init_params(initializer=mx.initializer.Normal(sigma=0.1))

    if reconstruction:
        capsule_output = mx.mod.Module(symbol=output_list[1],
                                       data_names=['data'],
                                       label_names=None,
                                       context=ctx)
        reconstruction_output = mx.mod.Module(symbol=output_list[2],
                                              data_names=['data'],
                                              label_names=['label'],
                                              context=ctx)

        capsule_output.bind(data_shapes=test_iter.provide_data,
                            label_shapes=None,
                            for_training=False,
                            shared_module=network,
                            grad_req='null')
        reconstruction_output.bind(data_shapes=test_iter.provide_data,
                                   label_shapes=test_iter.provide_label,
                                   for_training=False,
                                   shared_module=network,
                                   grad_req='null')
    else:
        capsule_output = mx.mod.Module(symbol=output_list[1],
                                       data_names=['data'],
                                       label_names=None,
                                       context=ctx)
        capsule_output.bind(data_shapes=test_iter.provide_data,
                            label_shapes=None,
                            for_training=False,
                            shared_module=network,
                            grad_req='null')

    lr_sch = mx.lr_scheduler.FactorScheduler(step=5000, factor=0.99)
    network.init_optimizer(optimizer='adam',
                           optimizer_params={
                               'learning_rate': 0.001,
                               'lr_scheduler': lr_sch
                           })

    if not os.path.exists("weights"):
        os.makedirs("weights")

    # learning
    for i in tqdm(range(1, epoch + 1, 1)):

        train_iter.reset()
        for batch in train_iter:
            network.forward(batch)
            out_grads = network.get_outputs()
            network.backward(out_grads=out_grads)
            network.update()

        if reconstruction:
            print("epoch : {}, last total loss : {}".format(
                i,
                mx.nd.mean(network.get_outputs()[0]).asscalar()))
            if i % save_period == 0:
                print('Saving weights')
                network.save_params(
                    "weights/MNIST_Reconstruction_weights-{}.param".format(i))
        else:
            print("epoch : {}, last margin loss : {}".format(
                i,
                mx.nd.mean(network.get_outputs()[0]).asscalar()))
            if i % save_period == 0:
                print('Saving weights')
                network.save_params("weights/MNIST_weights-{}.param".format(i))

        test_accuracy = evaluate_accuracy(test_iter, capsule_output)
        print("Test_acc : {0:0.3f}%".format(test_accuracy * 100))

    print("Optimization complete\n")

    test_accuracy = evaluate_accuracy(test_iter, capsule_output)
    print("Test_acc : {0:0.3f}%".format(test_accuracy * 100))

    if reconstruction:
        generate_image(test_iter, reconstruction_output)
示例#8
0
def NeuralNet(epoch, batch_size, save_period):
    '''
    load_data

    1. SoftmaxOutput must be

    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl}, batch_size=batch_size) #test data

    2. LogisticRegressionOutput , LinearRegressionOutput , MakeLoss and so on.. must be

    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data
    '''
    '''In this Autoencoder tutorial, we don't need the label data.'''
    (_, _, train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz',
                                               'train-images-idx3-ubyte.gz')
    (_, _, test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz',
                                              't10k-images-idx3-ubyte.gz')
    '''data loading referenced by Data Loading API '''
    train_iter = mx.io.NDArrayIter(data={'input': to2d(train_img)},
                                   label={'input_': to2d(train_img)},
                                   batch_size=batch_size,
                                   shuffle=True)  #training data
    test_iter = mx.io.NDArrayIter(data={'input': to2d(test_img)},
                                  label={'input_': to2d(test_img)})  #test data
    '''Autoencoder network

    <structure>
    input - encode - middle - decode -> output
    '''
    input = mx.sym.Variable('input')
    output = mx.sym.Variable('input_')

    # encode
    affine1 = mx.sym.FullyConnected(data=input, name='encode', num_hidden=100)
    encode1 = mx.sym.Activation(data=affine1,
                                name='sigmoid1',
                                act_type="sigmoid")

    # middle
    affine2 = mx.sym.FullyConnected(data=encode1, name='middle', num_hidden=50)
    middle = mx.sym.Activation(data=affine2,
                               name='sigmoid2',
                               act_type="sigmoid")

    # decode
    affine3 = mx.sym.FullyConnected(data=middle, name='decode', num_hidden=100)
    decode1 = mx.sym.Activation(data=affine3,
                                name='sigmoid1',
                                act_type="sigmoid")

    # output
    result = mx.sym.FullyConnected(data=decode1, name='result', num_hidden=784)

    #LogisticRegressionOutput contains a sigmoid function internally. and It should be executed with xxxx_lbl_one_hot data.
    result = mx.sym.LogisticRegressionOutput(data=result, label=output)

    # We visualize the network structure with output size (the batch_size is ignored.)
    shape = {"input": (batch_size, 784)}
    mx.viz.plot_network(
        symbol=result,
        shape=shape)  #The diagram can be found on the Jupiter notebook.
    print result.list_arguments()

    # Fisrt optimization method
    # weights save

    model_name = 'weights/Autoencoder'
    checkpoint = mx.callback.do_checkpoint(model_name, period=save_period)

    #training mod
    mod = mx.mod.Module(symbol=result,
                        data_names=['input'],
                        label_names=['input_'],
                        context=mx.gpu(0))

    #test mod
    test = mx.mod.Module(symbol=result,
                         data_names=['input'],
                         label_names=['input_'],
                         context=mx.gpu(0))

    # Network information print
    print mod.data_names
    print mod.label_names
    print train_iter.provide_data
    print train_iter.provide_label
    '''if the below code already is declared by mod.fit function, thus we don't have to write it.
    but, when you load the saved weights, you must write the below code.'''
    mod.bind(data_shapes=train_iter.provide_data,
             label_shapes=train_iter.provide_label)

    #weights load

    # When you want to load the saved weights, uncomment the code below.
    symbol, arg_params, aux_params = mx.model.load_checkpoint(model_name, 100)

    #the below code needs mod.bind, but If arg_params and aux_params is set in mod.fit, you do not need the code below, nor do you need mod.bind.
    mod.set_params(arg_params, aux_params)
    '''if you want to modify the learning process, go into the mod.fit function()'''

    mod.fit(
        train_iter,
        initializer=mx.initializer.Xavier(rnd_type='gaussian',
                                          factor_type="avg",
                                          magnitude=1),
        optimizer='adam',  #optimizer
        optimizer_params={'learning_rate': 0.001},  #learning rate
        eval_metric=mx.metric.MSE(),
        # Once the loaded parameters are declared here,You do not need to declare mod.set_params,mod.bind
        arg_params=None,
        aux_params=None,
        num_epoch=epoch,
        epoch_end_callback=checkpoint)

    # Network information print
    print mod.data_shapes
    print mod.label_shapes
    print mod.output_shapes
    print mod.get_params()
    print mod.get_outputs()

    print "training_data : {}".format(mod.score(train_iter, ['mse']))

    print "Optimization complete."

    #################################TEST####################################
    '''load method1 - load the saved parameter'''
    #symbol, arg_params, aux_params = mx.model.load_checkpoint(model_name, 100)
    '''load method2 - load the training mod.get_params() directly'''
    #arg_params, aux_params = mod.get_params()
    '''load method3 - using the shared_module'''
    """
    Parameters
    shared_module : Module
        Default is `None`. This is used in bucketing. When not `None`, the shared module
        essentially corresponds to a different bucket -- a module with different symbol
        but with the same sets of parameters (e.g. unrolled RNNs with different lengths).
    """
    test.bind(data_shapes=test_iter.provide_data,
              label_shapes=test_iter.provide_label,
              shared_module=mod,
              for_training=False)
    '''Annotate only when running test data. and Uncomment only if it is 'load method1' or 'load method2' '''
    #test.set_params(arg_params, aux_params)
    '''test'''
    column_size = 10
    row_size = 10  #     column_size x row_size <= 10000

    result = test.predict(test_iter,
                          num_batch=column_size * row_size).asnumpy()
    '''range adjustment 0 ~ 1 -> 0 ~ 255 '''
    result = result * 255.0
    '''generator image visualization'''
    fig_g, ax_g = plt.subplots(row_size,
                               column_size,
                               figsize=(column_size, row_size))
    fig_g.suptitle('generator')
    for j in xrange(row_size):
        for i in xrange(column_size):
            ax_g[j][i].set_axis_off()
            ax_g[j][i].imshow(np.reshape(result[i + j * column_size],
                                         (28, 28)),
                              cmap='gray')

    fig_g.savefig("generator.png")
    '''real image visualization'''
    fig_r, ax_r = plt.subplots(row_size,
                               column_size,
                               figsize=(column_size, row_size))
    fig_r.suptitle('real')
    for j in xrange(row_size):
        for i in xrange(column_size):
            ax_r[j][i].set_axis_off()
            ax_r[j][i].imshow(test_img[i + j * column_size], cmap='gray')
    fig_r.savefig("real.png")

    plt.show()
示例#9
0
def NeuralNet(epoch, batch_size, save_period):
    '''
    load_data

    1. SoftmaxOutput must be

    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl}, batch_size=batch_size) #test data

    2. LogisticRegressionOutput , LinearRegressionOutput , MakeLoss and so on.. must be

    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data
    '''
    '''In this Autoencoder tutorial, we don't need the label data.'''
    (_, _, train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz',
                                               'train-images-idx3-ubyte.gz')
    (_, _, test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz',
                                              't10k-images-idx3-ubyte.gz')
    '''data loading referenced by Data Loading API '''
    train_iter = mx.io.NDArrayIter(data={'input': to2d(train_img)},
                                   label={'input_': to2d(train_img)},
                                   batch_size=batch_size,
                                   shuffle=True)  #training data
    test_iter = mx.io.NDArrayIter(data={'input': to2d(test_img)},
                                  label={'input_': to2d(test_img)})  #test data
    '''Autoencoder network

    <structure>
    input - encode - middle - decode -> output
    '''
    input = mx.sym.Variable('input')
    output = mx.sym.Variable('input_')

    # encode
    affine1 = mx.sym.FullyConnected(data=input, name='encode', num_hidden=100)
    encode1 = mx.sym.Activation(data=affine1,
                                name='sigmoid1',
                                act_type="sigmoid")

    # middle
    affine2 = mx.sym.FullyConnected(data=encode1, name='middle', num_hidden=50)
    middle = mx.sym.Activation(data=affine2,
                               name='sigmoid2',
                               act_type="sigmoid")

    # decode
    affine3 = mx.sym.FullyConnected(data=middle, name='decode', num_hidden=100)
    decode1 = mx.sym.Activation(data=affine3,
                                name='sigmoid1',
                                act_type="sigmoid")

    # output
    result = mx.sym.FullyConnected(data=decode1, name='result', num_hidden=784)

    #LogisticRegressionOutput contains a sigmoid function internally. and It should be executed with xxxx_lbl_one_hot data.
    result = mx.sym.LogisticRegressionOutput(data=result, label=output)

    # We visualize the network structure with output size (the batch_size is ignored.)
    shape = {"input": (batch_size, 784)}
    mx.viz.plot_network(
        symbol=result,
        shape=shape)  #The diagram can be found on the Jupiter notebook.
    print result.list_arguments()

    #training mod
    mod = mx.mod.Module(symbol=result,
                        data_names=['input'],
                        label_names=['input_'],
                        context=mx.gpu(0))
    mod.bind(data_shapes=train_iter.provide_data,
             label_shapes=train_iter.provide_label)

    #load the saved mod data
    mod.load_params("weights/mod-100.params")

    mod.init_params(initializer=mx.initializer.Xavier(
        rnd_type='uniform', factor_type='avg', magnitude=3))
    mod.init_optimizer(optimizer='adam',
                       optimizer_params={'learning_rate': 0.01})

    #test mod
    test = mx.mod.Module(symbol=result,
                         data_names=['input'],
                         label_names=['input_'],
                         context=mx.gpu(0))
    '''load method2 - using the shared_module'''
    """
    Parameters
    shared_module : Module
        Default is `None`. This is used in bucketing. When not `None`, the shared module
        essentially corresponds to a different bucket -- a module with different symbol
        but with the same sets of parameters (e.g. unrolled RNNs with different lengths).
    """
    test.bind(data_shapes=test_iter.provide_data,
              label_shapes=test_iter.provide_label,
              shared_module=mod,
              for_training=False)

    # Network information print
    print mod.data_names
    print mod.label_names
    print train_iter.provide_data
    print train_iter.provide_label
    '''############Although not required, the following code should be declared.#################'''
    '''make evaluation method 1 - Using existing ones.
        metrics = {
        'acc': Accuracy,
        'accuracy': Accuracy,
        'ce': CrossEntropy,
        'f1': F1,
        'mae': MAE,
        'mse': MSE,
        'rmse': RMSE,
        'top_k_accuracy': TopKAccuracy
    }'''

    metric = mx.metric.create(['acc', 'mse'])
    '''make evaluation method 2 - Making new things.'''
    '''
    Custom evaluation metric that takes a NDArray function.
    Parameters:
    •feval (callable(label, pred)) – Customized evaluation function.
    •name (str, optional) – The name of the metric.
    •allow_extra_outputs (bool) – If true, the prediction outputs can have extra outputs.
    This is useful in RNN, where the states are also produced in outputs for forwarding.
    '''
    def zero(label, pred):
        return 0

    null = mx.metric.CustomMetric(zero)

    for epoch in xrange(1, epoch + 1, 1):
        print "epoch : {}".format(epoch)
        train_iter.reset()
        #total_batch_number = np.ceil(len(train_img) / (batch_size * 1.0))
        #temp=0
        for batch in train_iter:
            mod.forward(batch, is_train=True)
            mod.backward()
            mod.update()

            #cost
            #temp+=(mod.get_outputs()[0].asnumpy()-batch.data[0].asnumpy())

        print "training_data : {}".format(mod.score(train_iter, ['mse']))
        #cost = (0.5*np.square(temp)/(total_batch_number*1.0)).mean()
        #print "cost value : {}".format(cost)

        #Save the data
        if epoch % save_period == 0:
            print('Saving weights')
            mod.save_params("weights/mod-{}.params".format(epoch))

    # Network information print
    print mod.data_shapes
    print mod.label_shapes
    print mod.output_shapes
    print mod.get_params()
    print mod.get_outputs()
    print "Optimization complete."

    #################################TEST####################################
    '''load method2 - load the training mod.get_params() directly'''
    #arg_params, aux_params = mod.get_params()
    '''Annotate only when running test data. and Uncomment only if it is 'load method2' '''
    #test.set_params(arg_params, aux_params)
    '''test'''
    column_size = 10
    row_size = 10  #     column_size x row_size <= 10000

    result = test.predict(test_iter,
                          num_batch=column_size * row_size).asnumpy()
    '''range adjustment 0 ~ 1 -> 0 ~ 255 '''
    result = result * 255.0
    '''generator image visualization'''
    fig_g, ax_g = plt.subplots(row_size,
                               column_size,
                               figsize=(column_size, row_size))
    fig_g.suptitle('generator')
    for j in xrange(row_size):
        for i in xrange(column_size):
            ax_g[j][i].set_axis_off()
            ax_g[j][i].imshow(np.reshape(result[i + j * column_size],
                                         (28, 28)),
                              cmap='gray')

    fig_g.savefig("generator.png")
    '''real image visualization'''
    fig_r, ax_r = plt.subplots(row_size,
                               column_size,
                               figsize=(column_size, row_size))
    fig_r.suptitle('real')
    for j in xrange(row_size):
        for i in xrange(column_size):
            ax_r[j][i].set_axis_off()
            ax_r[j][i].imshow(test_img[i + j * column_size], cmap='gray')
    fig_r.savefig("real.png")

    plt.show()
示例#10
0
def NeuralNet(epoch,batch_size,save_period):

    time_step=28
    hidden_unit_number1 = 100
    hidden_unit_number2 = 100
    fc_number=100
    class_number=10
    use_cudnn = True

    '''
    load_data

    1. SoftmaxOutput must be

    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl}, batch_size=batch_size) #test data
                                                                or
    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data

    2. LogisticRegressionOutput , LinearRegressionOutput , MakeLoss and so on.. must be

    train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data

    '''
    (train_lbl_one_hot, train_lbl, train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz','train-images-idx3-ubyte.gz')
    (test_lbl_one_hot, test_lbl, test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz')

    '''data loading referenced by Data Loading API '''
    train_iter = mx.io.NDArrayIter(data={'data' : train_img},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data
    test_iter   = mx.io.NDArrayIter(data={'data' : test_img}, label={'label' : test_lbl_one_hot}) #test data

    ####################################################-Network-################################################################
    data = mx.sym.Variable('data')
    label = mx.sym.Variable('label')
    data = mx.sym.transpose(data, axes=(1, 0, 2))  # (time,batch,column)

    '''1. RNN cell declaration'''

    '''
    Fusing RNN layers across time step into one kernel.
    Improves speed but is less flexible. Currently only
    supported if using cuDNN on GPU.
    '''

    if use_cudnn:#faster
        lstm1 = mx.rnn.FusedRNNCell(num_hidden=hidden_unit_number1, mode="lstm", prefix="lstm1_",get_next_state=True)
        lstm2 = mx.rnn.FusedRNNCell(num_hidden=hidden_unit_number2, mode="lstm", prefix="lstm2_",get_next_state=True)
    else:
        lstm1 = mx.rnn.LSTMCell(num_hidden=hidden_unit_number1, prefix="lstm1_")
        lstm2 = mx.rnn.LSTMCell(num_hidden=hidden_unit_number2, prefix="lstm2_")

    '''2. Unroll the RNN CELL on a time axis.'''

    ''' unroll's return parameter
    outputs : list of Symbol
              output symbols.
    states : Symbol or nested list of Symbol
            has the same structure as begin_state()

    '''
    #if you see the unroll function
    layer1, state1= lstm1.unroll(length=time_step, inputs=data, merge_outputs=True, layout='TNC')
    layer1 = mx.sym.Dropout(layer1, p=0.3)
    layer2, state2 = lstm2.unroll(length=time_step, inputs=layer1, merge_outputs=True,layout="TNC")
    rnn_output= mx.sym.Reshape(state2[-1], shape=(-1,hidden_unit_number1))

    '''FullyConnected Layer'''
    affine1 = mx.sym.FullyConnected(data=rnn_output, num_hidden=fc_number, name='affine1')
    act1 = mx.sym.Activation(data=affine1, act_type='sigmoid', name='sigmoid1')
    affine2 = mx.sym.FullyConnected(data=act1, num_hidden=class_number, name = 'affine2')
    output = mx.sym.SoftmaxOutput(data=affine2, label=label, name='softmax')


    # We visualize the network structure with output size (the batch_size is ignored.)
    shape = {"data": (time_step,batch_size,28)}
    mx.viz.plot_network(symbol=output,shape=shape)#The diagram can be found on the Jupiter notebook.
    print output.list_arguments()

    # training mod

    mod = mx.module.Module(symbol = output , data_names=['data'], label_names=['label'], context=mx.gpu(0))
    mod.bind(data_shapes=train_iter.provide_data,label_shapes=train_iter.provide_label)

    #load the saved mod data
    mod.load_params("weights/Neural_Net-100.params")

    mod.init_params(initializer=mx.initializer.Xavier(rnd_type='gaussian', factor_type='avg', magnitude=1))
    mod.init_optimizer(optimizer='adam',optimizer_params={'learning_rate': 0.001})

    # test mod
    test = mx.mod.Module(symbol=output, data_names=['data'], label_names=['label'], context=mx.gpu(0))

    '''load method1 - using the shared_module'''
    """
    Parameters
    shared_module : Module
        Default is `None`. This is used in bucketing. When not `None`, the shared module
        essentially corresponds to a different bucket -- a module with different symbol
        but with the same sets of parameters (e.g. unrolled RNNs with different lengths).
    """
    test.bind(data_shapes=test_iter.provide_data, label_shapes=test_iter.provide_label,shared_module=mod,for_training=False)

    # Network information print
    print mod.data_names
    print mod.label_names
    print train_iter.provide_data
    print train_iter.provide_label

    '''############Although not required, the following code should be declared.#################'''

    '''make evaluation method 1 - Using existing ones.
        metrics = {
        'acc': Accuracy,
        'accuracy': Accuracy,
        'ce': CrossEntropy,
        'f1': F1,
        'mae': MAE,
        'mse': MSE,
        'rmse': RMSE,
        'top_k_accuracy': TopKAccuracy
    }'''

    metric = mx.metric.create(['acc','mse'])

    '''make evaluation method 2 - Making new things.'''
    '''
    Custom evaluation metric that takes a NDArray function.
    Parameters:
    •feval (callable(label, pred)) – Customized evaluation function.
    •name (str, optional) – The name of the metric.
    •allow_extra_outputs (bool) – If true, the prediction outputs can have extra outputs.
    This is useful in RNN, where the states are also produced in outputs for forwarding.
    '''

    def zero(label, pred):
        return 0

    null = mx.metric.CustomMetric(zero)

    for epoch in xrange(1,epoch+1,1):
        print "epoch : {}".format(epoch)
        train_iter.reset()
        #total_batch_number = np.ceil(len(train_img) / (batch_size * 1.0))
        #temp=0
        for batch in train_iter:
            mod.forward(batch, is_train=True)
            mod.backward()
            mod.update()

            #cost
            #temp+=(mod.get_outputs()[0].asnumpy()-batch.label[0].asnumpy())

        #cost = (0.5*np.square(temp)/(total_batch_number*1.0)).mean()
        result = test.predict(test_iter).asnumpy().argmax(axis=1)
        print "training_data : {}".format(mod.score(train_iter, ['mse', 'acc']))
        print 'accuracy during learning.  : {}%'.format(float(sum(test_lbl == result)) / len(result) * 100.0)
        #print "cost value : {}".format(cost)

        #Save the data
        if epoch%save_period==0:
            print('Saving weights')
            mod.save_params("weights/Neural_Net" .format(epoch))

    # Network information print
    print mod.data_shapes
    print mod.label_shapes
    print mod.output_shapes
    print mod.get_params()
    print mod.get_outputs()

    print "Optimization complete."
    #################################TEST####################################
    '''load method2 - load the training mod.get_params() directly'''
    #arg_params, aux_params = mod.get_params()

    '''Annotate only when running test data. and Uncomment only if it is 'load method2' '''
    #test.set_params(arg_params, aux_params)

    #batch by batch accuracy
    #To use the code below, Test / batchsize must be an integer.
    '''for preds, i_batch, eval_batch in mod.iter_predict(test_iter):
        pred_label = preds[0].asnumpy().argmax(axis=1)
        label = eval_batch.label[0].asnumpy().argmax(axis=1)
        print('batch %d, accuracy %f' % (i_batch, float(sum(pred_label == label)) / len(label)))
    '''
    '''test'''
    result = test.predict(test_iter).asnumpy().argmax(axis=1)
    print 'Final accuracy : {}%' .format(float(sum(test_lbl == result)) / len(result)*100.0)