# X_train[ i ].dtype: float32
# X_train[ i ].min(): 0.0
# X_train[ i ].max(): 1.0
# X_train[ i ].shape = (HEIGHT*WIDTH,): (reshape=True),  (1024,)
#
# type(Y_train):  <class 'numpy.ndarray'>
# Y_train.dtype: float32
# Y_train.shape: (one_hot=False),  (60000,)
#
# type(Y_train[ i ]): <class 'numpy.float32'>
# Y_train[ i ].dtype: float32
# Y_train[ i ]: (one_hot=False),  0...9
print('Reading train dataset (Train 60000.cdb)...')
X_train, Y_train = read_hoda_dataset(dataset_path='./DigitDB/Train 60000.cdb',
                                images_height=32,
                                images_width=32,
                                one_hot=False,
                                reshape=True)


# type(X_test):  <class 'numpy.ndarray'>
# X_test.dtype: float32
# X_test.shape: (reshape=False),  (20000, 32, 32, 1)
#
# type(X_test[ i ]): <class 'numpy.ndarray'>
# X_test[ i ].dtype: float32
# X_test[ i ].min(): 0.0
# X_test[ i ].max(): 1.0
# X_test[ i ].shape = (HEIGHT, WIDTH, CHANNEL): (reshape=False),  (32, 32, 1)
#
# type(Y_test):  <class 'numpy.ndarray'>
示例#2
0
import keras
import keras_metrics
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop

batch_size = 128
num_classes = 10
epochs = 20

# the data, split between train and test sets

print('Reading train dataset (Train 60000.cdb)...')
x_train, y_train = read_hoda_dataset(dataset_path='./DigitDB/Train 60000.cdb',
                                images_height=32,
                                images_width=32,
                                one_hot=False,
                                reshape=True)
print('Reading test dataset (Test 20000.cdb)...')
x_test, y_test = read_hoda_dataset(dataset_path='./DigitDB/Test 20000.cdb',
                              images_height=32,
                              images_width=32,
                              one_hot=True,
                              reshape=True)


x_train = x_train.reshape(60000, 1024)
x_test = x_test.reshape(20000, 1024)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
示例#3
0
from keras.layers.core import Dense, Activation, Dropout
from keras.optimizers import Adam
from keras.utils import np_utils
import numpy as np

np.random.seed(1671)

print(
    '################################################################################'
)
print()

print('Reading train 60000.cdb ...')
X_train, Y_train = read_hoda_dataset(dataset_path='./DigitDB/Train 60000.cdb',
                                     images_height=28,
                                     images_width=28,
                                     one_hot=False,
                                     reshape=True)

print('Reading test 20000.cdb ...')
X_test, Y_test = read_hoda_dataset(dataset_path='./DigitDB/Test 20000.cdb',
                                   images_height=28,
                                   images_width=28,
                                   one_hot=False,
                                   reshape=True)

print(
    '################################################################################'
)
print()
print('Begin Deep Learning Process (Simple Deep Learning')
示例#4
0
# import the necessary packages
from HodaDatasetReader import read_hoda_cdb, read_hoda_dataset
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import AveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras import backend as K
from keras.optimizers import Adam
from keras.utils import np_utils
import numpy as np
import cv2

print("Reading DataSets...")
trainData, trainLabels = read_hoda_dataset('./DigitDB/Train 60000.cdb')
validationData, validationLabels = read_hoda_dataset(
    './DigitDB/RemainingSamples.cdb')
testData, testLabels = read_hoda_dataset('./DigitDB/Test 20000.cdb')

# handle matrix for when Keras is using "channels first" ordering (Theano).
# see this: https://stackoverflow.com/questions/39815518/keras-maxpooling2d-layer-gives-valueerror
if K.image_data_format() == "channels_first":
    trainData = trainData.reshape((trainData.shape[0], 1, 32, 32))
    validationData = validationData.reshape(
        (validationData.shape[0], 1, 32, 32))
    testData = testData.reshape((testData.shape[0], 1, 32, 32))

# handle matrix for when Keras is using "channels last" ordering (Tensorflow).
else:
    trainData = trainData.reshape((trainData.shape[0], 32, 32, 1))
    print(
        "\nAccuracy: (true positives + true negatives) / (test size) * 100 = {}%"
        .format(accuracy))
    log.write(
        "\nAccuracy: (true positives + true negatives) / (test size) * 100 = {}% \n"
        .format(accuracy))


log = open("log.txt", "w")
log.write('#######################START########################\n')

print('Reading train dataset (Train 60000.cdb)...')
log.write('Reading train dataset (Train 60000.cdb)...\n')
train_images, train_labels = read_hoda_dataset(
    dataset_path='./DigitDB/Train 60000.cdb',
    images_height=32,
    images_width=32,
    one_hot=False,
    reshape=True)

print('Reading test dataset (Test 20000.cdb)...')
log.write('Reading test dataset (Test 20000.cdb)...\n')
test_images, test_labels = read_hoda_dataset(
    dataset_path='./DigitDB/Test 20000.cdb',
    images_height=32,
    images_width=32,
    one_hot=False,
    reshape=True)

print('Reading remaining samples dataset (RemainingSamples.cdb)...')
log.write('Reading remaining samples dataset (RemainingSamples.cdb)...\n')
remaining_images, remaining_labels = read_hoda_dataset(
示例#6
0
from HodaDatasetReader import read_hoda_dataset
import pickle

train_images, train_labels = read_hoda_dataset('./DigitDB/Train 60000.cdb',
                                               reshape=False)
test_images, test_labels = read_hoda_dataset('./DigitDB/Test 20000.cdb',
                                             reshape=False)
remaining_images, remaining_labels = read_hoda_dataset(
    './DigitDB/RemainingSamples.cdb', reshape=False)
# because of the dataset, it's better to shuffle the dataset to increase accuracy and avoid the network from memorising
train_images, train_labels = shuffle(np.array(train_images),
                                     np.array(train_labels))
test_images, test_labels = shuffle(np.array(test_images),
                                   np.array(test_labels))
remaining_images, remaining_labels = shuffle(np.array(remaining_images),
                                             np.array(remaining_labels))

# In order to save dataset to pickle
listNames = [
    'train_images', 'train_labels', 'test_images', 'test_labels',
    'remaining_images', 'remaining_labels'
]
for i in listNames:
    pickle_out = open("DigitDB/{}.pickle".format(i), 'wb')
    pickle.dump(i, pickle_out)
    pickle_out.close()

# Load the dataset after saving it
pickle_in = open("DigitDB/train_images.pickle", "rb")
train_images = pickle.load(pickle_in)