示例#1
0
    def setUpClass(cls):
        print "Loading data..."
        # Loads the MNIST dataset.
        traindata = load_mnist(
            'data/mnist/train-images.idx3-ubyte',
            'data/mnist/train-labels.idx1-ubyte'
        )
        # Choose a validation dataset.
        validation_size = 5000
        perm = np.random.permutation(len(traindata))
        traindata.shuffle(perm)
        validsamples = []
        trainsamples = []
        trainlabels = traindata.get_labels()
        validlabels = trainlabels[0:validation_size]
        trainlabels = trainlabels[validation_size:]
        i = 0

        for sample in traindata:
            if i < validation_size:
                validsamples.append(sample)
            else:
                trainsamples.append(sample)
            i += 1
        cls.traindata = IdentityDataset(trainsamples, trainlabels)
        cls.validdata = IdentityDataset(validsamples, validlabels)
        
        cls.testdata = load_mnist(
            'data/mnist/t10k-images.idx3-ubyte',
            'data/mnist/t10k-labels.idx1-ubyte'
        )
示例#2
0
def test_load_test_mnist():
    mnist = load_mnist()

    assert mnist['num_inputs'] == 28 * 28
    assert mnist['num_outputs'] == 10

    assert mnist['partitions']['test'][0] == 60000
    assert max(mnist['partitions']['test']) == 69999

    print('train len', len(mnist['partitions']['training']))

    assert mnist['data'][0]['output'].A[0].tolist().index(1) == 5
    assert mnist['data'][1]['output'].A[0].tolist().index(1) == 0
    assert mnist['data'][2]['output'].A[0].tolist().index(1) == 4
    assert mnist['data'][3]['output'].A[0].tolist().index(1) == 1
示例#3
0
def load_data(mode):
    (x_train, y_train), (x_test, y_test) = load_mnist(mode)
    y_train = np_utils.to_categorical(y_train, 10)
    y_test = np_utils.to_categorical(y_test, 10)

    x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
    x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
    batch_size = params.batch_size
    cut = batch_size * (x_train.shape[0] / batch_size)
    x_train = x_train[:cut]
    y_train = y_train[:cut]
    cut = batch_size * (x_test.shape[0] / batch_size)
    x_test = x_test[:cut]
    y_test = y_test[:cut]
    return (x_train, y_train), (x_test, y_test)
示例#4
0
    def get_classifier(self) -> CharacterClassifier:
        if self.__classifier:
            return self.__classifier

        self.__classifier = CharacterClassifier(self.__lbls)

        if os.path.isfile(self.__db_path):
            logger.info('Loading database...')
            with open(self.__db_path, 'rb') as file:
                self.__classifier.matricesPoids = pickle.load(file)
            logger.info('Done.')
        else:
            start_time = time.process_time()

            logger.info('Creating database')
            logger.info('Loading dataset...')

            if self.__mode:
                ds = dataset.load_ttf('dataset/OpenSans-Bold.ttf', self.__lbls)
            else:
                ds = dataset.load_mnist(
                    'dataset/train_img_mnist.dat', 'dataset/lbl_mnist.dat',
                    dict((i + 1, self.__lbls[i])
                         for i in range(len(self.__lbls))))

            ds = dataset.list_to_dict(ds)
            self.__classifier.donnee_ent = ds
            logger.info('Done. (' + str(time.process_time() - start_time) +
                        ' seconds)')
            self.__classifier.train_liste()

            with open(self.__db_path, 'wb') as file:
                pickle.dump(self.__classifier.matricesPoids, file)

            logger.info('Database created in ' +
                        str(time.process_time() - start_time) + ' seconds.')

        return self.__classifier
示例#5
0
def main(args):
    s_train, s_test = dataset.load_svhn()
    t_train, t_test = dataset.load_mnist()

    s_train_iter = SerialIterator(
        s_train, args.batchsize, shuffle=True, repeat=True)
    t_train_iter = SerialIterator(
        t_test, args.batchsize, shuffle=True, repeat=True)
    s_test_iter = SerialIterator(
        s_test, args.batchsize, shuffle=False, repeat=False)
    t_test_iter = SerialIterator(
        t_test, args.batchsize, shuffle=False, repeat=False)

    model = drcn.DRCN()
    target_model = LossAndAccuracy(model)
    loss_list = ['loss_cla_s', 'loss_cla_t', 'loss_rec']
    optimizer = chainer.optimizers.RMSprop(args.lr)
    optimizer.setup(model)
    optimizers = {
        'model': optimizer
    }

    updater = Updater(s_train_iter, t_train_iter, optimizers, args)
    out_dir = utils.prepare_dir(args)
    trainer = Trainer(updater, (args.max_iter, 'iteration'), out=out_dir)
    trainer.extend(extensions.LogReport(trigger=(args.interval, args.unit)))
    trainer.extend(
        extensions.snapshot_object(model, filename='model'),
        trigger=MaxValueTrigger('acc_t', (args.interval, args.unit)))
    trainer.extend(extensions.Evaluator(t_test_iter, target_model,
                                        device=args.device), trigger=(args.interval, args.unit))
    trainer.extend(extensions.PrintReport([args.unit, *loss_list, 'acc_s', 'acc_t', 'elapsed_time']))
    trainer.extend(extensions.PlotReport([*loss_list], x_key=args.unit, file_name='loss.png', trigger=(args.interval, args.unit)))
    trainer.extend(extensions.PlotReport(['acc_s', 'acc_t'], x_key=args.unit, file_name='accuracy.png', trigger=(args.interval, args.unit)))
    trainer.extend(extensions.ProgressBar(update_interval=1))
    trainer.run()
示例#6
0
import dataset
import numpy as np
import matplotlib.pyplot as plt

# load data
X, y = dataset.load_mnist()

size = y.size
X_n = np.zeros([size, 784])
y_n = np.zeros([size, 1])

# split to train and test
train_ratio = 0.4

# train set
train_num = int(train_ratio * size)
rand = np.random.random([train_num]) * size
train_ind = rand.astype(int)
X_tr = X[train_ind, :]
y_tr = y[train_ind]

# test set
total = range(size)
test_ind = np.array(list(set(total) - set(train_ind)))
X_te = X[test_ind, :]
y_te = y[test_ind]

print "train size", X_tr.shape, y_tr.shape
print "test size", X_te.shape, y_te.shape

# --- Verify dataset ---
def get_data():
    #(훈련 이미지, 훈련 레이블), (시험 이미지, 시험 레이블)
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
                                                      flatten=True,
                                                      one_hot_label=False)
    return x_train, t_train, x_test, t_test
示例#8
0
# 미니 배치
# 데이터의 일부를 추려 전체의 근사치로 이용한다.
# 신경망 학습에도 훈련 데이터로부터 일부만 골라 학습을 수행하며,
# 이 일부를 미니 배치라고 한다.
# 일부를 무작이로 뽑아 학습하는 방법을 미니 배치 학습이라고 한다.

import sys, os
import numpy as np
from dataset import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
                                                  one_hot_label=True)
print(x_train.shape)
print(t_train.shape)

# x_train의 차원 수를 확인한다.
train_size = x_train.shape[0]
batch_size = 10
# train_size 미만의 수 중에서 batch_size 만큼 뽑아낸다
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
示例#9
0
from engine import Engine
from dataset import load_mnist
import argparse
import sys

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--train', action='store_true', help='Run training')
    parser.add_argument('--test',  action='store_true', help='Run testing')
    args = parser.parse_args(sys.argv[1:])

    datasets = load_mnist()
    engine = Engine(datasets)
    if args.train:
        engine.init_engine()
        engine.train(restore_checkpoint=True)
    elif args.test:
        engine.init_engine(is_training=False)
        engine.test()
示例#10
0
    ax2.set_ylabel('accuracy', fontsize=23)
    ax.set_xlabel('epoch', fontsize=23)

    Ls = L1 + L2 + L3 + L4
    labs = [l.get_label() for l in Ls]
    leg = plt.legend(Ls, labs, loc='lower left', fontsize=18, frameon=True)
    leg.get_frame().set_edgecolor('k')
    leg.get_frame().set_linewidth(2)

    fname = 'epoch_{0}.png'.format(n_epochs)
    fname = os.path.join(dirpath, fname)
    fig.savefig(fname, dpi=fig.dpi)
    plt.close(fig)

def plot_rbm_filters(W):
    plt.figure(figsize=(12, 12))
    for i in xrange(64):
        filt = W[:, i].reshape((28, 28))
        plt.subplot(8, 8, i + 1)
        plt.imshow(filt, cmap=plt.cm.gray_r, interpolation='nearest')
        plt.xticks(())
        plt.yticks(())
    plt.suptitle('First 64 components extracted by RBM', fontsize=24)


if __name__ == '__main__':
    from dataset import load_mnist
    X, y = load_mnist(mode='train', path='../../data/')
    plot_greyscale_image(X[0]/255., title='Label is {0}'.format(y[0]))
    plt.show()
def get_data():
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
                                                      flatten=True,
                                                      one_hot_label=False)
    return x_test, t_test
示例#12
0
def run(shape,
        learning_rate=.1,
        corruption_rate=0,
        decay_rate=0,
        hidden_epochs=100,
        seed=0,
        init_with='random'):
    random.seed(seed)
    numpy.random.seed(seed)

    start_time = time.time()

    print('Running mnist with:', )
    print('\tshape:', shape)
    print('\tlearning_rate:', learning_rate)
    print('\tcorruption_rate:', corruption_rate)
    print('\tdecay_rate:', decay_rate)
    print('\thidden_epochs:', hidden_epochs)
    print('\tseed:', seed)
    print('\tinit_with:', init_with)

    print('load dataset')
    mnist = load_mnist()

    data_loaded_at_time = time.time()

    if init_with == 'sac':
        print('initialize with sac')
        initial_net = stacked_auto_encoder(
            mnist,
            shape,
            learning_rate,
            hidden_epochs=hidden_epochs,
            corruption_rate=corruption_rate,
            decay_rate=decay_rate,
        )
    else:
        initial_net = random_weights(mnist, shape)

    print('Initial net %: ', evaluate_net(mnist, initial_net, 'test') * 100)

    initialization_done_time = time.time()

    # Now train it on the actual data
    refined_net = train(
        mnist,
        initial_net,
        learning_rate,
        # This is the max number of epochs to allow.  With patience I've
        #   never come close to hitting this.
        1000,
    )

    print('After refinement %: ',
          evaluate_net(mnist, refined_net, 'test') * 100)

    end_time = time.time()

    print('start time:', start_time)
    print('data done loading:', data_loaded_at_time)
    print('init done at:', initialization_done_time)
    print('end time:', end_time)
    print('total elapsed time:', end_time - start_time)
示例#13
0
            pickle.dump(params, f)

    def load_params(self, file_name="params.pkl"):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        for key, val in params.items():
            self.params[key] = val

        for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):
            self.layers[key].W = self.params['W' + str(i + 1)]
            self.layers[key].b = self.params['b' + str(i + 1)]


# 实际训练过程
# 读入数据
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)

max_epochs = 20

network = SimpleConvNet(input_dim=(1, 28, 28),
                        conv_param={
                            'filter_num': 30,
                            'filter_size': 5,
                            'pad': 0,
                            'stride': 1
                        },
                        hidden_size=100,
                        output_size=10,
                        weight_init_std=0.01)

trainer = Trainer(network,
示例#14
0
              y_train,
              batch_size=128,
              epochs=50,
              verbose=2,
              validation_data=(x_test, y_test))
    acc = 100 * model.evaluate(x_test, y_test, verbose=0)[1]
    print('Test error:', 100 - acc)

    if pool_type != 'max':
        acc1 = 100 * model.evaluate(
            resize_images(x_test, scale=0.8), y_test, verbose=0)[1]
        print("Test error (scale=0.8): ", 100 - acc1)


if __name__ == '__main__':
    x_train, y_train, x_test, y_test = load_mnist()

    # Baseline model
    evaluate_model('max', n_filters=64)
    evaluate_model('max', n_filters=32)
    evaluate_model('max', n_filters=4)

    # Global Max Pooling
    evaluate_model('gmp', n_filters=16)
    evaluate_model('gmp', n_filters=24)
    evaluate_model('gmp', n_filters=64)
    evaluate_model('gmp', n_filters=128)

    # SPP
    evaluate_model('spp', n_filters=8)
    evaluate_model('spp', n_filters=16)
示例#15
0
# coding: utf-8
import sys, os
import numpy as np
from dataset import load_mnist
from PIL import Image


def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()


#(훈련 이미지, 훈련 레이블), (시험 이미지, 시험 레이블)
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True,
                                                  normalize=False)

img = x_train[0]
label = t_train[0]
print(label)  # 5

print(img.shape)  # (784,)
img = img.reshape(28, 28)  # 형상을 원래 이미지의 크기로 변형
print(img.shape)  # (28, 28)

img_show(img)
示例#16
0
from tqdm import *
import numpy

import dataset
from nets import simpleNet


mnist = dataset.load_mnist()

nn = simpleNet(architecture=numpy.array([784 ,100, 10]))

for epoch in range(10):
	success = numpy.zeros(shape=(mnist[0][0].shape[0],))
	for example in tqdm(range(mnist[0][0].shape[0])):  #
		
		input = mnist[0][0][example,:]
		target = mnist[0][1][example]
		
		output = nn.forward(input)

		reward = -0.00001
		if output == target:
			reward = 0.0001
			success[example] += 1

		nn.backward(reward)

	print numpy.sum(success) / success.shape[0]
示例#17
0
import os
import sys
sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
import numpy as np
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from dataset import load_mnist
from common.multi_layer_net_extend import MultiLayerNetExtend
from common.trainer import Trainer

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

# 为了再现过拟合,减少学习数据
x_train = x_train[:300]
t_train = t_train[:300]

# 设定是否使用Dropuout,以及比例 ========================
use_dropout = True  # 不使用Dropout的情况下为False
dropout_ratio = 0.2
# ====================================================

network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100],
                              output_size=10, use_dropout=use_dropout, dropout_ration=dropout_ratio)
trainer = Trainer(network, x_train, t_train, x_test, t_test,
                  epochs=301, mini_batch_size=100,
                  optimizer='sgd', optimizer_param={'lr': 0.01}, verbose=True)
trainer.train()

train_acc_list, test_acc_list = trainer.train_acc_list, trainer.test_acc_list
示例#18
0
    if dataset == "imagenet":
        data_iter = ds.load_imagenet_rec(batch_size, shp[2])
        def data_iter_func():
            data = data_iter.next()
            return data.data[0], data.label[0]
    elif dataset == "voc":
        val_data = ds.load_voc(batch_size, shp[2])
        data_iter = iter(val_data)
        def data_iter_func():
            return next(data_iter)
    elif dataset == "trec":
        data_iter = ds.load_trec(batch_size)
        def data_iter_func():
            return next(data_iter)
    elif dataset == "mnist":
        val_loader = ds.load_mnist(batch_size)
        data_iter = iter(val_loader)
        def data_iter_func():
            return next(data_iter)
    elif dataset == "quickdraw":
        val_data = ds.load_quickdraw10(batch_size)
        data_iter = iter(val_data)
        def data_iter_func():
            return next(data_iter)
    else:
        assert False, "dataset:%s is not supported" % (dataset)

    inputs = [mx.sym.var("data")]
    sym, params = mx.sym.load(sym_file), nd.load(prm_file)
    sym, params = spass.sym_quant_prepare(sym, params, inputs_ext)