def load_data(): """ 载入数据,包括训练和测试数据 Args: Return: X_train:训练数据集 Y_train:训练数据标签 X_test:测试数据集 Y_test:测试数据标签 classes(cat/non-cat):分类list px_num:数据的像素长度 """ # 调用utils.load_data_sets()方法,获取数据 X_train, Y_train, X_test, Y_test, classes = utils.load_data_sets() # 获取数据相关信息 train_num = X_train.shape[0] # train_num: 训练集中样本数量 test_num = X_test.shape[0] # test_num: 测试集中样本数量 px_num = X_train.shape[1] # px_num: 每张图片的长度,单位为像素 # 转换数据形状 data_dim = px_num * px_num * 3 X_train = X_train.reshape(train_num, data_dim).T X_test = X_test.reshape(test_num, data_dim).T # 数据归一化处理 X_train = X_train / 255. X_test = X_test / 255. data = [X_train, Y_train, X_test, Y_test, classes, px_num] return data
def load_data(): """ 载入数据,数据项包括: train_set_x_orig:原始训练数据集 train_set_y:原始训练数据标签 test_set_x_orig:原始测试数据集 test_set_y:原始测试数据标签 classes(cat/non-cat):分类list Args: Return: """ global TEST_SET, DATA_DIM, CLASSES, TRAINING_SET train_x_ori, train_y, test_x_ori, test_y, classes = \ load_data_sets() m_test = test_x_ori.shape[0] m_train = train_x_ori.shape[0] num_px = train_x_ori.shape[1] # 定义纬度 DATA_DIM = num_px * num_px * 3 # 展开数据 train_set_x_flatten = train_x_ori.reshape(m_train, -1) test_x_flatten = test_x_ori.reshape(m_test, -1) # 归一化数据 train_set_x = train_set_x_flatten / 255. test_x = test_x_flatten / 255. TRAINING_SET = np.hstack((train_set_x, train_y.T)) TEST_SET = np.hstack((test_x, test_y.T)) CLASSES = classes
def main(): """ 程序入口 """ # 加载数据 train_X, train_Y, test_X, test_Y = utils.load_data_sets() print "1. show the data set" plt.scatter(test_X.T[:, 0], test_X.T[:, 1], c=test_Y, s=40, cmap=plt.cm.Spectral) plt.title("show the data set") plt.show() print "2. begin to training" # 训练模型 parameters = train(train_X, train_Y, n_h=10, num_iterations=10000, print_cost=True) # 预测训练集 predictions = predict(parameters, train_X) # 输出准确率 print('Train Accuracy: %d' % float((np.dot(train_Y, predictions.T) + np.dot(1 - train_Y, 1 - predictions.T)) / float(train_Y.size) * 100) + '%') # 预测测试集 predictions = predict(parameters, test_X) print('Test Accuracy: %d' % float((np.dot(test_Y, predictions.T) + np.dot(1 - test_Y, 1 - predictions.T)) / float(test_Y.size) * 100) + '%') # Plot the decision boundary print "3. output the division" utils.plot_decision_boundary(lambda x: predict(parameters, x.T), train_X, train_Y) plt.title("Decision Boundary for hidden layer size " + str(4)) plt.show()
def main(): """ show some images in the train dataset """ train_x_ori, train_y, test_x_ori, test_y, classes = \ utils.load_data_sets() print "1. load data from the dataset" print "there is " + str(train_y.shape[1]) + " train data and " \ + str(test_y.shape[1]) + " test data" print "train set categories as " + str(classes) print "2. show train sets label, 0 means not cat, 1 means cat" print train_y print "3. show image(2) with label 1, it should be a cat image" index = 2 plt.imshow(train_x_ori[index]) plt.pause(30) print "4. show image(1) with label 0, it should not be a cat image" index = 1 plt.imshow(train_x_ori[index]) plt.pause(30) print "5. show test sets label, 0 means not cat, 1 means cat" print test_y print "6. show image(1) with label 1, it should be a cat image" index = 1 plt.imshow(test_x_ori[index]) plt.pause(30)
def load_data(): """ 载入数据,包括训练和测试数据 Args: Return: X_train:原始训练数据集 Y_train:原始训练数据标签 X_test:原始测试数据集 Y_test:原始测试数据标签 classes(cat/non-cat):分类list px_num:数据的像素长度 """ X_train, Y_train, X_test, Y_test, classes = utils.load_data_sets() train_num = X_train.shape[0] test_num = X_test.shape[0] px_num = X_train.shape[1] data_dim = px_num * px_num * 3 X_train = X_train.reshape(train_num, data_dim).T X_test = X_test.reshape(test_num, data_dim).T X_train = X_train / 255. X_test = X_test / 255. data = [X_train, Y_train, X_test, Y_test, classes, px_num] return data
def load_data(): """ 载入数据,数据项包括: train_set_x_orig:原始训练数据集 train_set_y:原始训练数据标签 test_set_x_orig:原始测试数据集 test_set_y:原始测试数据标签 classes(cat/non-cat):分类list Args: Return: """ global TRAINING_SET, TEST_SET, DATA_DIM train_x_ori, train_y, test_x_ori, test_y, classes = \ utils.load_data_sets() m_train = train_x_ori.shape[0] m_test = test_x_ori.shape[0] num_px = train_x_ori.shape[1] # 定义纬度 DATA_DIM = num_px * num_px * 3 # 数据展开,注意此处为了方便处理,没有加上.T的转置操作 train_x_flatten = train_x_ori.reshape(m_train, -1) test_x_flatten = test_x_ori.reshape(m_test, -1) # 归一化 train_x = train_x_flatten / 255. test_x = test_x_flatten / 255. TRAINING_SET = np.hstack((train_x, train_y.T)) TEST_SET = np.hstack((test_x, test_y.T))
def main(): """ main entry """ train_X, train_Y, test_X, test_Y, classes = utils.load_data_sets() # 获取数据相关信息 train_num = train_X.shape[0] test_num = test_X.shape[0] # 本例中num_px=64 px_num = train_X.shape[1] # 转换数据形状 data_dim = px_num * px_num * 3 train_X = train_X.reshape(train_num, data_dim).T test_X = test_X.reshape(test_num, data_dim).T train_X = train_X / 255. test_X = test_X / 255. layer = [12288, 20, 7, 5, 1] parameters = utils.deep_neural_network(train_X, train_Y, layer, 2500) print 'Train Accuracy:', utils.predict_image(parameters, train_X, train_Y), '%' print 'Test Accuracy:', utils.predict_image(parameters, test_X, test_Y), '%'
def run_logreg_analyses(): "Runs the logistic regression analyses and saves the Stan samples to disk" data_sets = utils.load_data_sets() for data_set_name, data in data_sets: for analysis_name, get_trial_pairs in LOGREG_ANALYSES[data_set_name]: if not exists( get_logreg_results_filename(data_set_name, analysis_name) + '.txt'): run_logreg_model(data_set_name, data, get_trial_pairs, analysis_name)
def main(): """ main entry """ train_x, train_y, test_x, test_y = utils.load_data_sets() layer = [2, 4, 1] parameters = neural_network(train_x, train_y, layer, 10000) print('train:', predict_result(parameters, train_x, train_y)) print('test:', predict_result(parameters, test_x, test_y))
def main(): images_dir = 'svnh_test_images' if not os.path.exists(images_dir): os.makedirs(images_dir) utils.download_train_and_test_data() _, testset = utils.load_data_sets() idx = np.random.randint(0, testset['X'].shape[3], size=64) test_images = testset['X'][:, :, :, idx] test_images = np.rollaxis(test_images, 3) for ii in range(len(test_images)): scipy.misc.toimage(test_images[ii]).save("{}/image_{}.jpg".format(images_dir, ii))
def main(): X_train, y_train, X_test, y_test = load_data_sets( 'data/X_train.npy', 'data/y_train.npy', 'data/X_test.npy', 'data/y_test.npy' ) datagen = augment_data(X_train) batch_size = 50 model = train_model( datagen.flow(X_train, y_train, batch_size=batch_size), (X_test, y_test), len(X_train)/batch_size ) model.save('models/keras_cnn')
def load_test_images(): ''' Loads 64 random images from SVNH test data sets :return: Tuple of (test images, image labels) ''' utils.download_train_and_test_data() _, testset = utils.load_data_sets() idx = np.random.randint(0, testset['X'].shape[3], size=64) test_images = testset['X'][:, :, :, idx] test_labels = testset['y'][idx] test_images = np.rollaxis(test_images, 3) test_images = utils.scale(test_images) return test_images, test_labels
def main(): "Runs the model fitting for all data sets" hybrid_model = utils.get_stan_model( join(utils.MODELS_DIR, 'hybrid.stan'), join(utils.MODELS_DIR, 'hybrid.bin')) mb_model = utils.get_stan_model( join(utils.MODELS_DIR, 'model_based.stan'), join(utils.MODELS_DIR, 'model_based.bin')) if not exists(utils.MODEL_RESULTS_DIR): mkdir(utils.MODEL_RESULTS_DIR) for dsname, data_set in utils.load_data_sets(): dsname = dsname.replace(' ', '_').lower() # Eliminate slow trials data_set = data_set[data_set.slow == 0] model_dat = { 's1': [], 'a1': [], 'a2': [], 's2': [], 'reward': [], 'N': len(data_set.participant.unique()), 'num_trials': [ len(data_set[data_set.participant == part]) \ for part in data_set.participant.unique()], } model_dat['T'] = max(model_dat['num_trials']) for i, part in enumerate(data_set.participant.unique()): part_info = data_set[data_set.participant == part] assert len(part_info) == model_dat['num_trials'][i] fill = [1]*(model_dat['T'] - len(part_info)) # Dummy data to fill the array model_dat['s1'].append(list(part_info.init_state) + fill) model_dat['s2'].append(list(part_info.final_state) + fill) model_dat['a1'].append(list(part_info.choice1) + fill) model_dat['a2'].append(list(part_info.choice2) + fill) model_dat['reward'].append(list(part_info.reward) + fill) for model_name, stan_model in (('hybrid', hybrid_model), ('model_based', mb_model)): flnm = f'{dsname}_{model_name}' results_flnm = join(utils.MODEL_RESULTS_DIR, flnm + '.txt') # Do not rerun the model if results already exist if not exists(results_flnm): fit = stan_model.sampling( data=model_dat, iter=ITER, chains=CHAINS, warmup=WARMUP, sample_file=join(utils.MODEL_RESULTS_DIR, flnm + '.csv'), refresh=10) with open(results_flnm, 'w') as fit_results_file: fit_results_file.write(str(fit))
def load_data(): """ 载入数据,数据项包括: train_set_x:原始训练数据集 train_set_y:原始训练数据标签 test_set_x:原始测试数据集 test_set_y:原始测试数据标签 Args: Return: """ global TRAINING_SET, TEST_SET, DATA_DIM train_set_x, train_set_y, test_set_x, test_set_y = utils.load_data_sets() # 定义纬度 DATA_DIM = 2 TRAINING_SET = np.hstack((train_set_x.T, train_set_y.T)) TEST_SET = np.hstack((test_set_x.T, test_set_y.T))
def main(): # preparations create_checkpoints_dir() utils.download_train_and_test_data() trainset, testset = utils.load_data_sets() # create real input for the GAN model (its dicriminator) and # GAN model itself real_size = (32, 32, 3) z_size = 100 learning_rate = 0.0003 tf.reset_default_graph() input_real = tf.placeholder(tf.float32, (None, *real_size), name='input_real') net = GAN(input_real, z_size, learning_rate) # craete dataset dataset = Dataset(trainset, testset) # train the model batch_size = 128 epochs = 25 _, _, _ = train(net, dataset, epochs, batch_size, z_size)
def main(): """ show dataset and the result of logistic regression """ np.random.seed(1) # 加载数据 train_X, train_Y, test_X, test_Y = utils.load_data_sets() print "1. show the data set" plt.scatter(test_X.T[:, 0], test_X.T[:, 1], c=test_Y, s=40, cmap=plt.cm.Spectral) plt.title("show the data set") plt.show() shape_X = train_X.shape shape_Y = train_Y.shape m = train_Y.shape[1] print 'The shape of X is: ' + str(shape_X) print 'The shape of Y is: ' + str(shape_Y) print 'I have m = %d training examples!' % (m) clf = sklearn.linear_model.LogisticRegressionCV() clf.fit(train_X.T, train_Y.T) print "2. show the result of logistic classification" utils.plot_decision_boundary(lambda x: clf.predict(x), train_X, train_Y) plt.title("Logistic Regression") plt.show() lr_predictions = clf.predict(train_X.T) print ('Accuracy of logistic regression: %d ' % float((np.dot(train_Y, lr_predictions) + np.dot(1 - train_Y, 1 - lr_predictions)) / float(train_Y.size) * 100) + '% ' + "(percentage of correctly labelled datapoints)")
# 256 for first try, try x2 or x0.5 batch_size = 32 # 128 # Network parameters output_dim = 185 # Display frequency (print/#batch) display_step = 400 logdir = "train_logs" if os.path.exists(logdir): shutil.rmtree(logdir) os.makedirs(logdir) '''main part of training''' # # load train & validation data data_family = load_data_sets(image_size[0], output_dim, batch_size, train_list, val_list) # TF placeholder for graph input and output y = tf.placeholder(tf.float32, [None, output_dim], name='data_label') # Initialize model model = ResNet([224, 224, 3], output_dim, basic_block, [3, 4, 6, 3]) predicts = model.predicts x, training = model.images, model.training with tf.name_scope("Asymmetric_L2_Loss"): obj_loss = asym_l2_loss(predicts, y) reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) loss = obj_loss + tf.add_n(reg_loss)
tf.summary.scalar('obj loss', obj_loss) lr_rate = 0.001 var_list = [ v for v in tf.trainable_variables() if any(v.name in s for s in train_vars) ] with tf.name_scope('train'): optm = tf.train.MomentumOptimizer(learning_rate=lr_rate, momentum=0.9) grads_vars = optm.compute_gradients(loss, var_list) train_op = optm.apply_gradients(grads_and_vars=grads_vars) exclude = [ 'resnet_v1_50/logits/weights', 'resnet_v1_50/logits/biases', 'resnet_v1_50/fc6/weights', 'resnet_v1_50/fc6/biases' ] vars_restore = slim.get_variables_to_restore(exclude=exclude) restorer = tf.train.Saver(var_list=vars_restore) saver = tf.train.Saver(max_to_keep=10) feed_dict = OrderedDict.fromkeys([inputs, truths, training]) group_op = tf.group(train_op) data_family = load_data_sets(output_dims, batch_size, train_list, val_list, base_dir) # begin training train(group_op, loss, feed_dict, data_family, num_epochs, saver, restorer, model_path)