示例#1
0
文件: main.py 项目: APinCan/DenseNet
def main():
    # DenseNet의 경우 {L=40, K=12}, {L=100, K=12}, {L=100, K=24}
    # DenseNet-BC의 경우 {L=100, K=12, {L=250, K=24},  {L=190, K=40}
    print('>>> load data')
    train_loader, test_loader = load_CIFAR10()

    L = 250
    k = 24
    training_epochs = 300

    print('>>> make model')
    densenet = DenseNet(L=L, k=k)
    print(densenet)
    densenet = densenet.to(device)

    # SGD아닐지도 모름
    optimzer = optim.SGD(densenet.parameters(),
                         weight_decay=0.001,
                         momentum=0.9,
                         lr=0.1,
                         nesterov=True)
    scheduler = lr_scheduler.MultiStepLR(
        optimizer=optimzer,
        milestones=(int(training_epochs * 0.5), int(training_epochs * 0.25)),
        gamma=0.1)
    criterion = nn.CrossEntropyLoss()

    print(">>> training start")
    training(model=densenet,
             epochs=training_epochs,
             train_loader=train_loader,
             scheduler=scheduler,
             optimizer=optimzer,
             criterion=criterion)
示例#2
0
def CIFAR10_test():
    from load_data import load_CIFAR10

    X_train, Y_train, X_test, Y_test = load_CIFAR10('../data/cifar10/')  # 3072 x 50000

    nn = NN3(4, reg_lam = 0.01)
    nn.load_examples(X_train, Y_train, axis = 1)
    nn.load_test_data(X_test, Y_test)
    nn.data_preprocessing()
    nn.weight_init()
    nn.visualize()
示例#3
0
def test_tfrecords():
	reader = tf.TFRecordReader()
	file = 'train.tfrecords'
	filename_queue = tf.train.string_input_producer([file], num_epochs=None)
	_, serialized_example = reader.read(filename_queue)
	features = tf.parse_single_example(serialized_example,features={
		'label':tf.FixedLenFeature([],tf.int64),
		'image':tf.FixedLenFeature([],tf.string)
		})

	image = tf.image.decode_png(features['image'], channels=3)
	image = tf.image.resize_image_with_crop_or_pad(image, 32, 32)
	#image = tf.decode_raw(features['image'],tf.uint8)
	label = tf.cast(features['label'],tf.int32)

	images, labels = tf.train.batch([image, label],
		batch_size=128,
		num_threads = 1,
		capacity = 10 * 128,
		)

	images = tf.cast(images, tf.float32)
	
	with tf.Session() as sess:
		#sess.run(tf.initialize_all_variables())
		coord = tf.train.Coordinator()
		threads = tf.train.start_queue_runners(sess=sess, coord=coord)
		a,b = sess.run([images, labels])
		print('rrrrrr')
		print(a.shape)
		print(b.shape)
		print(a)
		print(b)
		c = data_preprocess(a)
		print(c.shape)
		coord.request_stop()
		coord.join(threads)

	cifar10_dir = 'cifar-10-batches-py'
	X_train, Y_train, X_test, Y_test = load_CIFAR10(cifar10_dir)
	print(X_train[0])
	print(Y_train[0])
def main(model_name):
	cifar10_dir = 'cifar-10-batches-py'
	X_train, Y_train, X_test, Y_test = load_CIFAR10(cifar10_dir)

	X_test = data_preprocess(X_test, train=False, model=model_name)
	print(X_train.shape)
	print(X_test.shape)
	# return ;
	
	X_train, Y_train = input('train', 128)
	with tf.Session() as sess:
		coord = tf.train.Coordinator()
		threads = tf.train.start_queue_runners(sess=sess, coord=coord)

		parameter_path = "checkpoint_" + model_name + "/variable.ckpt"
		path_exists = "checkpoint_" + model_name

		if model_name == "lenet":
			print('begin to train lenet model')
			model = Model_Lenet()
		elif model_name == "vgg19":
			print('begin to train vgg19 model')
			model = Model_Vgg19()
		else:
			print('we do not have this model')
			return ;

		saver = tf.train.Saver()
		if os.path.exists(path_exists):
			saver.restore(sess, parameter_path)
			print('loaded the weight')
		else:
			sess.run(tf.global_variables_initializer())
			print('init all the weight')

		train = Trainer(model, sess, X_train, Y_train, X_test, Y_test, model_name)
		save_path = saver.save(sess, parameter_path)
示例#5
0
        fxph = f(x)  # evaluate f(x + h)
        x[ix] = oldval - h  # increment by h
        fxmh = f(x)  # evaluate f(x - h)
        x[ix] = oldval  # reset

        grad_numerical = (fxph - fxmh) / (2 * h)
        grad_analytic = analytic_grad[ix]
        rel_error = abs(grad_numerical - grad_analytic) / (
            abs(grad_numerical) + abs(grad_analytic))
        print('numerical: %f analytic: %f, relative error: %e' %
              (grad_numerical, grad_analytic, rel_error))


if __name__ == "__main__":

    X_train, y_train, X_test, y_test = load_CIFAR10("cifar-10-batches-py")
    num_training = 49000
    num_validation = 1000
    num_test = 1000
    num_dev = 500

    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]

    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]

    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
示例#6
0

# 要使用上面的代码我们需要一个只有一个参数的函数
# (在这里参数就是权重)所以也包含了X_train和Y_train
def CIFAR10_loss_fun(data, weights):
    """
    data = [X_train, Y_train]
    """
    from Lecture3.Loss import L_SVM
    return L_SVM(data[0], data[1], weights)


if __name__ == '__main__':
    from load_data import load_CIFAR10, sample_training_data

    X_train, Y_train = load_CIFAR10('../data/cifar10/')[0:2]
    X_train = np.append(X_train, np.ones((X_train.shape[0], 1)), axis=1)
    data_train = [X_train, Y_train]
    data_batch = sample_training_data(data_train, 256)  # 256个数据
    W = np.random.rand(10, 3073) * 0.001  # 随机权重向量
    op = Optimization()
    df, loss_original = op.eval_numerical_gradient(CIFAR10_loss_fun,
                                                   data_batch, W)  # 得到梯度、初始损失值

    print('original loss: %f' % (loss_original, ))
    min_loss = loss_original
    # 查看不同步长的效果
    for step_size_log in [-10, -9, -8, -7, -6, -5, -4, -3, -2, -1]:
        step_size = 10**step_size_log

        W_new = W - step_size * df  # 权重空间中的新位置
示例#7
0
        best_model['lr'] = lr
        return best_model


def save_weights(model):
    np.save(str(datetime.now()) + "_hidden_W.npy", model['W1'])
    np.save(str(datetime.now()) + "_hidden_b.npy", model['b1'])
    np.save(str(datetime.now()) + "_out_W.npy", model['W2'])
    np.save(str(datetime.now()) + "_out_b.npy", model['b2'])
    np.save(str(datetime.now()) + "_lr.npy", model['lr'])


if __name__ == '__main__':
    from load_data import load_CIFAR10, sample_training_data

    X_train, Y_train, X_test, Y_test = load_CIFAR10('../data/cifar10/')  # 3072 x 50000
    mean_train = np.mean(X_train, axis = 1).reshape((-1, 1))
    std_train = np.std(X_train, axis = 1).reshape((-1, 1))

    X_train -= mean_train  # 0中心化:均值减法
    X_train /= std_train  # 归一化:每个维度都除以其标准差
    X_test -= mean_train
    X_test /= std_train
    np.linalg.norm()

    trainer = ClassifierTrainer()
    node_num = [3072, 100, 10]
    x_tiny = X_train[:, :20]
    y_tiny = Y_train[:, :20]
    first_model = trainer.train(x_tiny, y_tiny, x_tiny, y_tiny, node_num,
                                num_epochs = 150, reg = 0.01, update = 'sgd',
示例#8
0
#! /usr/bin/env python3
# -*- coding: utf-8 -*-

# 文件的读取,我们直接通过给定的`load_CIFAR10`模块读取数据。
# 感谢这个magic函数,你不必要担心如何写读取的过程。如果想了解细节,可以参考此文件。
from load_data import load_CIFAR10

import os
import numpy as np
import matplotlib.pyplot as plt

# 定义文件夹的路径:请不要修改此路径! 不然提交后的模型不能够运行。
cifar10_dir = os.path.join(os.path.dirname(__file__),
                           '../../data/cifar-10-batches-py')

# 读取文件,并把数据保存到训练集和测试集合。
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

# 先来查看一下每个变量的大小,确保没有任何错误!X_train和X_test的大小应该为 N*W*H*3
# N: 样本个数, W: 样本宽度 H: 样本高度, 3: RGB颜色。 y_train和y_test为图片的标签。
print("训练数据和测试数据:", X_train.shape, y_train.shape, X_test.shape, y_test.shape)
print("标签的种类: ", np.unique(y_train))  # 查看标签的个数以及标签种类,预计10个类别。
        print(num_test)
        # loop over all test rows
        for i in range(num_test):
            print(str(i) + ".")
            # find the nearest training image to the i'th test image

            # using the L1 distance (sum of absolute value differences)
            # distances = np.sum(np.abs(self.Xtr - X[i, :]), axis = 1)

            # using the L2 distance (computing the euclidean distance between two vectors)
            distances = np.sqrt(np.sum(np.square(self.Xtr - X[i, :]), axis = 1))

            min_index = np.argmin(distances)  # get the index with smallest distance
            Ypred[i] = self.ytr[min_index]  # predict the label of the nearest example

        return Ypred

if __name__ == '__main__':

    Xtr, Ytr, Xte, Yte = load_CIFAR10('../data/cifar10/')  # a magic function we provide
    # flatten out all images to be one-dimensional
    Xtr_rows = Xtr.reshape(Xtr.shape[0], 32 * 32 * 3)  # Xtr_rows becomes 50000 x 3072
    Xte_rows = Xte.reshape(Xte.shape[0], 32 * 32 * 3)  # Xte_rows becomes 10000 x 3072

    nn = NearestNeighbor()  # create a Nearest Neighbor classifier class
    nn.train(Xtr_rows, Ytr)  # train the classifier on the training images and labels
    Yte_predict = nn.predict(Xte_rows)  # predict labels on the test images
    # and now print the classification accuracy, which is the average number
    # of examples that are correctly predicted (i.e. label matches)
    print('accuracy: %f' % (np.mean(Yte_predict == Yte)))
示例#10
0
def CIFAR10_test():
    from load_data import load_CIFAR10, sample_training_data

    X_train, Y_train, X_test, Y_test = load_CIFAR10(
        '../data/cifar10/')  # 3072 x 50000
    '''数据预处理'''
    mean_train = np.mean(X_train, axis=1).reshape((-1, 1))
    std_train = np.std(X_train, axis=1).reshape((-1, 1))

    X_train -= mean_train  # 0中心化:均值减法
    X_train /= std_train  # 归一化:每个维度都除以其标准差
    X_test -= mean_train
    X_test /= std_train
    '''神经网络初始化'''
    data_batch = sample_training_data([X_train, Y_train],
                                      256)  # 256个数据, 3072 x 256
    in_num = 3072
    hidden_num = 100
    out_num = 10
    nn = NN(data_batch[0], in_num, hidden_num, out_num, data_batch[1], 10**-3,
            0.01)
    loss_original = nn.loss
    print("original loss: %f" % (loss_original, ))
    '''选取合适步长'''
    min_loss = loss_original
    for step_size_log in [
            -10,
            -9,
            -8,
            -7,
            -6,
            -5,
            -4,
            -3,
            -2,
            -1,
            0,
    ]:
        step_size = 10**step_size_log
        n = copy.deepcopy(nn)
        n.hidden_layer.step_size = step_size
        n.output_layer.step_size = step_size

        dh = n.output_layer.backward(n.gradient)
        n.hidden_layer.backward(dh)

        loss_new = n.forward()
        print("step_size: %.10f, loss: %f" % (
            step_size,
            loss_new,
        ))
        if loss_new < min_loss:
            min_loss = loss_new
            best_step_size = step_size

    print("best step size %.10f" % (best_step_size, ))
    nn.hidden_layer.step_size = best_step_size
    nn.output_layer.step_size = best_step_size
    time.sleep(1)
    '''训练之前先选取一小部分数据,看神经网络是否可以过饱和,判断反向传播是否正常工作'''
    batch_size = 16
    data_batch = sample_training_data([X_train, Y_train], batch_size)
    for i in range(300):
        nn.forward(data_batch)
        loss = nn.loss
        correct = np.sum(nn.probability > 0.9) / batch_size
        print("i: %d , loss: %f, correct ratio: %f" % (
            i,
            loss,
            correct,
        ))
        if (loss < 0.00001 or correct >= 0.99):
            break
        dh = nn.output_layer.backward(nn.gradient)
        nn.hidden_layer.backward(dh)
        time.sleep(0.1)
    print("loss: %f, correct ratio: %f" % (
        loss,
        correct,
    ))
    if (correct < 0.99):
        raise Exception("BP does not work correctly")
    '''开始训练'''
    for i in range(1000):
        batch_size = 256
        data_batch = sample_training_data([X_train, Y_train], batch_size)
        nn.forward(data_batch)
        # h_weights_grad = nn.eval_numerical_gradient(nn.hidden_layer)
        # o_weights_grad = nn.eval_numerical_gradient(nn.output_layer)
        loss = nn.loss
        correct = np.sum(nn.probability > 0.5) / batch_size
        print("i: %d , loss: %f, correct ratio: %f" % (
            i,
            loss,
            correct,
        ))
        if (loss < 0.00001):
            break

        dh = nn.output_layer.backward(nn.gradient)
        nn.hidden_layer.backward(dh)
        # time.sleep(0.1)
    '''训练结果
    训练次数,损失,正确率
    i: 0 , loss: 5.286373, correct ratio: 0.027344
    i: 1 , loss: 5.164099, correct ratio: 0.015625
    i: 2 , loss: 5.090839, correct ratio: 0.015625
    i: 3 , loss: 4.836606, correct ratio: 0.035156
    i: 4 , loss: 4.861015, correct ratio: 0.031250
    i: 5 , loss: 4.918304, correct ratio: 0.019531
    i: 6 , loss: 4.632134, correct ratio: 0.015625
    ...
    i: 991 , loss: 3.045871, correct ratio: 0.187500
    i: 992 , loss: 3.044970, correct ratio: 0.207031
    i: 993 , loss: 2.993322, correct ratio: 0.257812
    i: 994 , loss: 3.028033, correct ratio: 0.187500
    i: 995 , loss: 2.953715, correct ratio: 0.234375
    i: 996 , loss: 2.930693, correct ratio: 0.234375
    i: 997 , loss: 3.072322, correct ratio: 0.183594
    i: 998 , loss: 3.051221, correct ratio: 0.191406
    i: 999 , loss: 2.898737, correct ratio: 0.207031
    '''
    print("loss: %f" % (loss, ))
    '''测试神经网络'''
    data_test = [X_test, Y_test]
    nn.forward(data_test)
    loss = nn.loss
    print("test loss: %f, correct ratio: %f" % (
        loss,
        np.sum(nn.probability > 0.5) / batch_size,
    ))
    '''测试结果 test loss: 2.840334, correct ratio: 0.222656'''
    '''保存神经网络网络'''
    select = input("save weights and bias ? (y or n)")
    if select is "y":
        save_weights(nn)
        print("save successfully")