(x_train, y_train),(x_test, y_test) = cifar10.load_data()
x_train = x_train.astype(np.float32)
x_test  = x_test.astype(np.float32)

mean =  5./6. * x_train.mean(axis=(0,1,2)) + 1./6. * x_test.mean(axis=(0,1,2))
sd   = (5./6. * x_train.var(axis=(0,1,2))  + 1./6. * x_test.var(axis=(0,1,2)))**0.5  

train_dataset_base = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset_base.shuffle(50000, seed=3).batch(128)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.batch(128)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model_base = VGG(n_class=10, input_shape = (32,32,3))
model_base.save_weights('base_CNN_weights.h5')

train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)

for (x_train, y_train) in train_dataset:
  batch_evaluate_train_loss(model_base, x_train, y_train)

for (x_test, y_test) in test_dataset:
  batch_evaluate_test_loss(model_base, x_test, y_test)
 
start_losses = (train_loss.result().numpy()*1,
                test_loss.result().numpy()*1)

train_loss.reset_states()
x_test = x_test.astype(np.float32)

mean = 5. / 6. * x_train.mean(axis=(0, 1,
                                    2)) + 1. / 6. * x_test.mean(axis=(0, 1, 2))
sd = (5. / 6. * x_train.var(axis=(0, 1, 2)) +
      1. / 6. * x_test.var(axis=(0, 1, 2)))**0.5

train_dataset_base = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset_base.shuffle(50000, seed=3).batch(128)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.batch(128)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model = VGG(n_class=10,
            input_shape=(32, 32, 3),
            kernel_pen=1e-4,
            bias_pen=1e-4)
model.load_weights('base_CNN_weights.h5')

start_loss_train, start_loss_val = load_result('losses_CNN')
VGG_Net = load_result('VGG_performances_result_final')

optm = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8)

VGG_Net['Adam']['Keras']['loss'].append(start_loss_train)
VGG_Net['Adam']['Keras']['val_loss'].append(start_loss_val)

train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('train_accuracy')
test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('test_accuracy')
batch_size = args.bsize

train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=8, pin_memory=True)
valid_dl = DataLoader(valid_ds, batch_size * 4, num_workers=8, pin_memory=True)

device = get_default_device()
print(device)
train_dl = DeviceDataLoader(train_dl, device)
val_dl = DeviceDataLoader(valid_dl, device)

if args.model == 'res18':
    model = Resnet.resnet18(pretrained=args.pretrained)
elif args.model == 'res34':
    model = Resnet.resnet34(pretrained=args.pretrained)
elif args.model == 'vgg16':
    model = VGG.vgg16_bn(pretrained=args.pretrained)
elif args.model == 'googlenet':
    model = GoogleNet.GoogLeNet(num_classes=5, init_weights=True)
else:
    print("Model Fault!")
    exit(-1)

print('Using GPU:' + str(np.argmax(memory_gpu)))
print(model)
to_device(model, device)

# Training steps
epochs = args.epoch
max_lr = args.lr
grad_clip = args.grad_clip
weight_decay = args.weight_decay