def train(self, x_train, y_train, max_epoch): print('###Train###') self.setup(True) self.sess.run(tf.global_variables_initializer()) # Determine how many batches in an epoch num_samples = x_train.shape[0] num_batches = int(num_samples / self.conf.batch_size) print('---Run...') for epoch in range(1, max_epoch + 1): start_time = time.time() # Shuffle shuffle_index = np.random.permutation(num_samples) curr_x_train = x_train[shuffle_index] curr_y_train = y_train[shuffle_index] ### YOUR CODE HERE # Set the learning rate for this epoch # Usage example: divide the initial learning rate by 10 after several epochs starter_learning_rate = 0.1 ### END CODE HERE loss_value = [] for i in range(num_batches): ### YOUR CODE HERE # Construct the current batch. # Don't forget to use "parse_record" to perform data preprocessing. learning_rate = starter_learning_rate * (0.1 ** (epoch / 10)) x_batch = curr_x_train[i*self.conf.batch_size: min((i+1)*self.conf.batch_size, num_samples)] x_batch = list(map(lambda x: parse_record(x, True), x_batch)) y_batch = curr_y_train[i*self.conf.batch_size: min((i+1)*self.conf.batch_size, num_samples)] ### END CODE HERE # Run feed_dict = {self.inputs: x_batch, self.labels: y_batch, self.learning_rate: 0.001} loss, _ = self.sess.run( [self.losses, self.train_op], feed_dict=feed_dict) print('Batch {:d}/{:d} Loss {:.6f}'.format(i, num_batches, loss), end='\r', flush=True) duration = time.time() - start_time print('Epoch {:d} Loss {:.6f} Duration {:.3f} seconds.'.format( epoch, loss, duration)) if epoch % self.conf.save_interval == 0: self.save(self.saver, epoch)
def load_data(data_dir): """Load the CIFAR-10 dataset. Args: data_dir: A string. The directory where data batches are stored. Returns: x_train: An numpy array of shape [50000, 3072]. (dtype=np.float32) y_train: An numpy array of shape [50000,]. (dtype=np.int32) x_test: An numpy array of shape [10000, 3072]. (dtype=np.float32) y_test: An numpy array of shape [10000,]. (dtype=np.int32) """ ### YOUR CODE HERE X = np.array([]) Y = np.array([]) first = True for file_name in os.listdir(data_dir): with open(os.path.join(data_dir, file_name), 'rb') as f: data_dict = pickle.load(f, encoding='bytes') if (first): X = np.array(data_dict[b'data']) Y = np.array(data_dict[b'labels']) first = False else: X = np.append(X, np.array(data_dict[b'data']), axis=0) Y = np.append(Y, np.array(data_dict[b'labels'])) ### END CODE HERE X_parsed = [] for i in range(len(X)): X_parsed.append(parse_record(X[i], True)) X_parsed = np.array(X_parsed) x_train = X_parsed[:50000] y_train = Y[:50000] x_test = X_parsed[50000:] y_test = Y[50000:] return x_train, y_train, x_test, y_test
def test_or_validate(self, x, y, checkpoint_num_list): print('###Test or Validation###') self.setup(False) self.sess.run(tf.global_variables_initializer()) # load checkpoint for checkpoint_num in checkpoint_num_list: checkpointfile = self.conf.modeldir+'/model.ckpt-'+str(checkpoint_num) self.load(self.loader, checkpointfile) preds = [] for i in tqdm(range(x.shape[0])): ### YOUR CODE HERE src = parse_record(x[i], False).reshape((1, 32, 32, 3)) preds.append(self.sess.run(self.preds, feed_dict={self.inputs: src})) ### END CODE HERE preds = np.array(preds).reshape(y.shape) print('Test accuracy: {:.4f}'.format(np.sum(preds==y)/y.shape[0]))
def test_or_validate(self, x, y, checkpoint_num_list): print('###Test or Validation###') self.setup(False) self.sess.run(tf.global_variables_initializer()) # load checkpoint for checkpoint_num in checkpoint_num_list: checkpointfile = self.conf.modeldir+'/model.ckpt-'+str(checkpoint_num) self.load(self.loader, checkpointfile) preds = [] logits = [] for i in tqdm(range(x.shape[0])): ### YOUR CODE HERE feed_dict = {self.inputs: [parse_record(np.array(x[i]), False)], self.labels: y[i]} preds.append(self.sess.run(self.preds, feed_dict=feed_dict)) logits.append(self.sess.run(self.logits, feed_dict=feed_dict)) ### END CODE HERE preds = np.array(preds).reshape(y.shape) # print('preds: ', preds[:50]) # print('logits: ', logits[0]) # print('y: ', y[:50]) print('Test accuracy: {:.4f}'.format(np.sum(preds==y)/y.shape[0]))
def train(self, x_train, y_train, max_epoch): print('###Train###') self.setup(True) self.sess.run(tf.global_variables_initializer()) # Determine how many batches in an epoch num_samples = x_train.shape[0] num_batches = int(num_samples / self.conf.batch_size) print('---Run...') learning_rate = 0.1 for epoch in range(1, max_epoch + 1): start_time = time.time() # Shuffle shuffle_index = np.random.permutation(num_samples) curr_x_train = x_train[shuffle_index] curr_y_train = y_train[shuffle_index] ### YOUR CODE HERE # Set the learning rate for this epoch # Usage example: divide the initial learning rate by 10 after several epochs learning_rate = 0.1 / (10**epoch) if epoch == 1: learning_rate = 0.01 elif epoch < 91: learning_rate = 0.1 elif epoch < 136: learning_rate = 0.01 else: learning_rate = 0.001 ### END CODE HERE loss_value = [] for i in range(num_batches): ### YOUR CODE HERE # Construct the current batch. # Don't forget to use "parse_record" to perform data preprocessing. x_batch = x_train[i * self.conf.batch_size:(i + 1) * self.conf.batch_size, :] x_batch = np.concatenate([parse_record(x, training=True).reshape(-1, 32, 32, 3) \ for x in x_batch], axis=0) # make y label one-hot y_batch_raw = y_train[i * self.conf.batch_size:(i + 1) * self.conf.batch_size] y_batch = np.zeros((y_batch_raw.shape[0], 10)) for i in range(y_batch_raw.shape[0]): y_batch[i][int(y_batch_raw[i])] = 1 ### END CODE HERE # Run feed_dict = { self.inputs: x_batch, self.labels: y_batch, self.learning_rate: learning_rate } loss, _, _ = self.sess.run( [self.losses, self.train_op, self.extra_update_ops], feed_dict=feed_dict) loss_value.append(loss) # added by me print('Batch {:d}/{:d} Loss {:.6f}'.format( i, num_batches, loss), end='\r', flush=True) duration = time.time() - start_time sum_loss = sum(loss_value) # added by me print('Epoch {:d} Loss {:.6f} Duration {:.3f} seconds.'.format( epoch, sum_loss, duration)) # loss-->sum_loss if epoch % self.conf.save_interval == 0: self.save(self.saver, epoch)
def train(self, x_train, y_train, max_epoch): print('###Train###') self.setup(True) self.sess.run(tf.global_variables_initializer()) # Determine how many batches in an epoch num_samples = x_train.shape[0] num_batches = int(num_samples / self.conf.batch_size) print(y_train) print('---Run...') for epoch in range(1, max_epoch+1): start_time = time.time() # Shuffle shuffle_index = np.random.permutation(num_samples) curr_x_train = x_train[shuffle_index] curr_y_train = y_train[shuffle_index] ### YOUR CODE HERE # Set the learning rate for this epoch # Usage example: divide the initial learning rate by 10 after several epochs learning_rate = 0.1 ### END CODE HERE loss_value = [] preds = [] # preds2 = [] logits = [] # logits2 = [] for i in range(num_batches): ### YOUR CODE HERE # Construct the current batch. # Don't forget to use "parse_record" to perform data preprocessing. if epoch % 10 == 0: learning_rate /= 10 x_batch = curr_x_train[i * self.conf.batch_size: min((i + 1) * self.conf.batch_size, num_samples)] x_batch = list(map(lambda x: parse_record(x, True), x_batch)) # print('np_mean: ', np.array(x_batch).mean(axis=(0, 1, 2))) y_batch = curr_y_train[i * self.conf.batch_size: min((i + 1) * self.conf.batch_size, num_samples)] ### END CODE HERE # Run feed_dict = {self.inputs: x_batch, self.labels: y_batch, self.learning_rate: learning_rate} loss, _ = self.sess.run( [self.losses, self.train_op], feed_dict=feed_dict) ### preds.append(self.sess.run(self.preds, feed_dict=feed_dict)) # preds2.append(self.sess.run(self.preds2, feed_dict=feed_dict)) logits.append(self.sess.run(self.logits, feed_dict=feed_dict)) # logits2.append(self.sess.run(self.logits2, feed_dict=feed_dict)) ### print('Batch {:d}/{:d} Loss {:.6f}'.format(i, num_batches, loss), end='\r', flush=True) duration = time.time() - start_time print('Epoch {:d} Loss {:.6f} Duration {:.3f} seconds.'.format( epoch, loss, duration)) ### preds = np.array(preds).reshape(curr_y_train.shape) # preds2 = np.array(preds2).reshape(curr_y_train.shape) logits = np.array(logits) # logits2 = np.array(logits2) ### print('Train accuracy: {:.4f}, {:d}/{:d}'.format(np.sum(preds == curr_y_train)/curr_y_train.shape[0], np.sum(preds == curr_y_train), curr_y_train.shape[0])) # print('Train accuracy2: {:.4f}, {:d}/{:d}'.format(np.sum(preds2 == curr_y_train) / curr_y_train.shape[0], # np.sum(preds2 == curr_y_train), curr_y_train.shape[0])) # print('preds2: ', preds2) if epoch % self.conf.save_interval == 0: self.save(self.saver, epoch)
def train(self, x_train, y_train, max_epoch): print('###Train###') self.model_setup(True) self.sess.run(tensorflow.compat.v1.global_variables_initializer()) num_samples = x_train.shape[0] num_batches = int(num_samples / self.batch_size) x_batch_new = np.zeros((x_train.shape[0] + num_batches, 3072)) for j in range(num_samples): x_batch_new[j] = x_train[j] y_batch_new = np.zeros((y_train.shape[0] + num_batches, 10)) for j in range(y_train.shape[0]): y_batch_new[j] = y_train[j] ## Mixup training ## forming one new virtual example by linearly interpolating between 2 random examples to improve regularization num_batches times for i in range(num_batches): t = np.random.beta(0.1, 0.1) rand_index1 = random.randint(i * self.configs["batch_size"], (i + 1) * self.configs["batch_size"] - 1) while True: rand_index2 = random.randint( i * self.configs["batch_size"], (i + 1) * self.configs["batch_size"] - 1) if (rand_index2 != rand_index1): break x_batch_new[ x_train.shape[0] + i] = x_train[rand_index1] * t + x_train[rand_index2] * (1 - t) y_batch_new[ y_train.shape[0] + i] = y_train[rand_index1] * t + y_train[rand_index2] * (1 - t) shuffle_index = np.random.permutation(num_samples + num_batches) curr_x_train = x_batch_new[shuffle_index] y_train = y_batch_new[shuffle_index] x_train = np.asarray([parse_record(i, True) for i in curr_x_train]) print("Data parsed") lr_scheduler = LearningRateScheduler(lr_schedule) filepath = '../saved_models/model.h5' checkpoint = ModelCheckpoint(filepath=filepath, monitor='accuracy', verbose=1, save_best_only=True) callbacks = [lr_scheduler, checkpoint] datagen = ImageDataGenerator( # set input mean to 0 over the dataset featurewise_center=True, # set each sample mean to 0 samplewise_center=False, # divide inputs by std of dataset featurewise_std_normalization=True, # divide each input by its std samplewise_std_normalization=False, # apply ZCA whitening zca_whitening=False, # epsilon for ZCA whitening zca_epsilon=1e-06, # randomly rotate images in the range (deg 0 to 180) rotation_range=0, # randomly shift images horizontally width_shift_range=0.1, # randomly shift images vertically height_shift_range=0.1, # set range for random shear shear_range=0., # set range for random zoom zoom_range=0., # set range for random channel shifts channel_shift_range=0., # set mode for filling points outside the input boundaries fill_mode='nearest', # value used for fill_mode = "constant" cval=0., # randomly flip images horizontal_flip=True, # randomly flip images vertical_flip=False, # set rescaling factor (applied before any other transformation) rescale=None, # set function that will be applied on each input preprocessing_function=None, # image data format, either "channels_first" or "channels_last" data_format=None, # fraction of images reserved for validation (strictly between 0 and 1) validation_split=0.2) # Compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(x_train) # Fit the model on the batches generated by datagen.flow(). self.model.fit_generator(datagen.flow(x_train, y_train, batch_size=self.batch_size), epochs=max_epoch, verbose=1, workers=4, callbacks=callbacks)
def train(self, x_train, y_train, max_epoch): print('###Train###') self.setup(True) self.sess.run(tf.global_variables_initializer()) # Determine how many batches in an epoch num_samples = x_train.shape[0] num_batches = int(num_samples / self.conf.batch_size) print('---Run...') for epoch in range(1, max_epoch + 1): start_time = time.time() # Shuffle shuffle_index = np.random.permutation(num_samples) curr_x_train = x_train[shuffle_index] curr_y_train = y_train[shuffle_index] ### YOUR CODE HERE # Set the learning rate for this epoch # Usage example: divide the initial learning rate by 10 after several epochs learning_rate = 1e-1 if epoch > 2 else 1e-2 if epoch * num_batches > 48e3: learning_rate *= 1e-2 elif epoch * num_batches > 32e3: learning_rate *= 1e-1 num_batches = int(num_samples / batchsize) ### END CODE HERE loss_value = [] for i in range(num_batches): ### YOUR CODE HERE # Construct the current batch. # Don't forget to use "parse_record" to perform data preprocessing. start_ix = i * batchsize end_ix = start_ix + batchsize if end_ix > curr_x_train.shape[ 0]: # shouldn't happen bc of the way num_batches is calculated but still. continue x_batch = [ parse_record(curr_x_train[ix], True) for ix in range(start_ix, end_ix) ] y_batch = curr_y_train[start_ix:end_ix] ### END CODE HERE # Run feed_dict = { self.inputs: x_batch, self.labels: y_batch, self.learning_rate: learning_rate } loss, _ = self.sess.run([self.losses, self.train_op], feed_dict=feed_dict) print('Batch {:d}/{:d} Loss {:.6f}'.format( i, num_batches, loss), end='\r', flush=True) duration = time.time() - start_time print('Epoch {:d} Loss {:.6f} Duration {:.3f} seconds.'.format( epoch, loss, duration)) if epoch % self.conf.save_interval == 0: self.save(self.saver, epoch)
def train(self, x_train, y_train, max_epoch): print('###Train###') self.setup(True) print('Setup is done') self.sess.run(tf.global_variables_initializer()) # Determine how many batches in an epoch num_samples = x_train.shape[0] num_batches = int(num_samples / self.conf.batch_size) print('---Run...') for epoch in range(1, max_epoch + 1): start_time = time.time() # Shuffle shuffle_index = np.random.permutation(num_samples) curr_x_train = x_train[shuffle_index] curr_y_train = y_train[shuffle_index] ### YOUR CODE HERE # Set the learning rate for this epoch # Usage example: divide the initial learning rate by 10 after several epochs initial = 0.01 if epoch < 100: learning_rate = initial elif epoch < 150: learning_rate = initial / 10 else: learning_rate = initial / 100 ### END CODE HERE loss_value = [] for i in range(num_batches): ### YOUR CODE HERE # Construct the current batch. # Don't forget to use "parse_record" to perform data preprocessing. x_batch = curr_x_train[self.conf.batch_size * i:self.conf.batch_size * (i + 1)] y_batch = curr_y_train[self.conf.batch_size * i:self.conf.batch_size * (i + 1)] x_batch_new = np.zeros((np.shape(x_batch)[0], 32, 32, 3)) for k in range(np.shape(x_batch)[0]): x_batch_new[k] = parse_record(x_batch[k], True) ### END CODE HERE # Run feed_dict = { self.inputs: x_batch_new, self.labels: y_batch, self.learning_rate: learning_rate } loss, _ = self.sess.run([self.losses, self.train_op], feed_dict=feed_dict) print('Batch {:d}/{:d} Loss {:.6f}'.format( i, num_batches, loss), end='\r', flush=True) duration = time.time() - start_time print('Epoch {:d} Loss {:.6f} Duration {:.3f} seconds.'.format( epoch, loss, duration)) if epoch % self.conf.save_interval == 0: self.save(self.saver, epoch)