def test_generator_wraparound(self): data = ModelData() nb_samples = 3 * BATCH_SIZE train, valid, test = data.get_data_tuples_generator( nb_samples=(nb_samples, nb_samples, nb_samples)) # First batch train_batch_1 = next(train) valid_batch_1 = next(valid) test_batch_1 = next(test) # Second and third batches for i in range(2): self.check_generator_length(train=train, valid=valid, test=test) # Fourth batch should wrap back to first batch train_batch_4 = next(train) valid_batch_4 = next(valid) test_batch_4 = next(test) self.assertTrue( np.array_equal(train_batch_1.X_train, train_batch_4.X_train)) self.assertTrue( np.array_equal(train_batch_1.y_train, train_batch_4.y_train)) self.assertTrue( np.array_equal(valid_batch_1.X_valid, valid_batch_4.X_valid)) self.assertTrue( np.array_equal(valid_batch_1.y_valid, valid_batch_4.y_valid)) self.assertTrue( np.array_equal(test_batch_1.X_test, test_batch_4.X_test)) self.assertTrue( np.array_equal(test_batch_1.y_test, test_batch_4.y_test))
def test_generator_full(self): data = ModelData() train, valid, test = data.get_data_tuples_generator() # First batch train_batch_1 = next(train) valid_batch_1 = next(valid) test_batch_1 = next(test) # All batches until end self.check_train_generator_length(train) self.check_valid_generator_length(valid) self.check_test_generator_length(test) # Next batch should wrap back to first batch train_batch_begin = next(train) valid_batch_begin = next(valid) test_batch_begin = next(test) self.assertTrue( np.array_equal(train_batch_1.X_train, train_batch_begin.X_train)) self.assertTrue( np.array_equal(train_batch_1.y_train, train_batch_begin.y_train)) self.assertTrue( np.array_equal(valid_batch_1.X_valid, valid_batch_begin.X_valid)) self.assertTrue( np.array_equal(valid_batch_1.y_valid, valid_batch_begin.y_valid)) self.assertTrue( np.array_equal(test_batch_1.X_test, test_batch_begin.X_test)) self.assertTrue( np.array_equal(test_batch_1.y_test, test_batch_begin.y_test))
def test_shrink_size(self): data = ModelData() data.set_shrink_size(train=TRAIN_SHRINK, valid=VALID_SHRINK, test=TEST_SHRINK) train = data.get_train_tuple() self.assertEqual(train.X_train.shape, (TRAIN_SHRINK * TRAIN_SAMPLES, LENGTH, WIDTH))
def test_data_tuples(self): data = ModelData() train, valid, test = data.get_data_tuples() self.assertEqual(train.X_train.shape, (TRAIN_SAMPLES, LENGTH, WIDTH)) self.assertEqual(train.y_train.shape, (TRAIN_SAMPLES, CLASSES)) self.assertEqual(valid.X_valid.shape, (VALID_SAMPLES, LENGTH, WIDTH)) self.assertEqual(valid.y_valid.shape, (VALID_SAMPLES, CLASSES)) self.assertEqual(test.X_test.shape, (TEST_SAMPLES, LENGTH, WIDTH)) self.assertEqual(test.y_test.shape, (TEST_SAMPLES, CLASSES))
def test_individual_tuples(self): data = ModelData() train = data.get_train_tuple() valid = data.get_valid_tuple() test = data.get_test_tuple() self.assertEqual(train.X_train.shape, (TRAIN_SAMPLES, LENGTH, WIDTH)) self.assertEqual(train.y_train.shape, (TRAIN_SAMPLES, CLASSES)) self.assertEqual(valid.X_valid.shape, (VALID_SAMPLES, LENGTH, WIDTH)) self.assertEqual(valid.y_valid.shape, (VALID_SAMPLES, CLASSES)) self.assertEqual(test.X_test.shape, (TEST_SAMPLES, LENGTH, WIDTH)) self.assertEqual(test.y_test.shape, (TEST_SAMPLES, CLASSES))
def test_setters_getters(self): data = ModelData() data.set_shrink_size(train=TRAIN_SHRINK, valid=VALID_SHRINK, test=TEST_SHRINK) self.assertEqual(data.nb_train_samples(), floor(TRAIN_SHRINK * TRAIN_SAMPLES)) self.assertEqual(data.nb_valid_samples(), floor(VALID_SHRINK * VALID_SAMPLES)) self.assertEqual(data.nb_test_samples(), floor(TEST_SHRINK * TEST_SAMPLES))
def test_open_file(self): data = ModelData() data.set_shrink_size(train=1, valid=1, test=1) train = data.open_train_file() valid = data.open_valid_file() test = data.open_test_file() train_ = h5py.File('data/processed/train.mat') valid_ = scipy.io.loadmat('data/processed/valid.mat') test_ = scipy.io.loadmat('data/processed/test.mat') self.assertEqual(train.keys(), train_.keys()) self.assertEqual(valid.keys(), valid_.keys()) self.assertEqual(test.keys(), test_.keys())
def main(argv): # Set up argument parsing parser = argparse.ArgumentParser( description='Test a Keras model on genetic sequences ' + 'to derive epigenetic mechanisms') parser.add_argument('model_name', metavar='MODEL_NAME', help="The unique name of the model to create") # parser.add_argument('create_fn', metavar='MODEL_FUNC', help="The name of the function in src/models/create_models to create a model with") parser.add_argument('weights_file', metavar='WEIGHTS_FILE', help="The file (.hdf5) to store the model's weights") parser.add_argument( 'json_file', metavar='JSON_FILE', help="The file (.json) to store the model's architecture in JSON") parser.add_argument( 'yaml_file', metavar='YAML_FILE', help="The file (.yaml) to store the model's architecture in YAML") parser.add_argument('log_file', metavar='LOG_FILE', help="The file (.csv) to store the model's epoch logs") parser.add_argument( 'tensorboard_dir', metavar='TB_DIR', help= "The directory to store the model's tensorboard data (if using Tensorflow backend)" ) args = parser.parse_args(argv) # Configure the tensorflow session to not run out of memory if (K.backend() == 'tensorflow'): config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.85 config.gpu_options.allocator_type = 'BFC' config.gpu_options.allow_growth = True set_session(tf.Session(config=config)) data = ModelData() model = Model(name=args.model_name) print(args.json_file) model.load_from(json_file=args.json_file, yaml_file=args.yaml_file ) # Temporary solution to running a model under a new name model.load_weights(weights_file=args.weights_file) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() _log.info('Retrieving test data...') test = data.get_test_tuple() log_utils.print_date_time(_log) start = time.time() _log.info('Testing model...') model.test(test=test) log_utils.print_elapsed_time(_log, start=start, end=time.time()) _log.info('Creating predictions...') y_predict = model.predict(test.X_test) log_utils.print_elapsed_time(_log, start=start, end=time.time()) dict = {} dict['predictions'] = np.array(y_predict) scipy.io.savemat( 'models/predictions/y_predict_' + args.model_name + '.mat', dict) log_utils.print_date_time(_log)
def main(argv): # Set up argument parsing parser = argparse.ArgumentParser( description='Run a Keras model on genetic sequences ' + 'to derive epigenetic mechanisms') parser.add_argument('model_name', metavar='MODEL_NAME', help="The unique name of the model to create") parser.add_argument( 'create_fn', metavar='MODEL_FUNC', help= "The name of the function in src/models/create_models to create a model with" ) parser.add_argument('weights_file', metavar='WEIGHTS_FILE', help="The file (.hdf5) to store the model's weights") parser.add_argument( 'json_file', metavar='JSON_FILE', help="The file (.json) to store the model's architecture in JSON") parser.add_argument( 'yaml_file', metavar='YAML_FILE', help="The file (.yaml) to store the model's architecture in YAML") parser.add_argument('log_file', metavar='LOG_FILE', help="The file (.csv) to store the model's epoch logs") parser.add_argument( 'tensorboard_dir', metavar='TB_DIR', help= "The directory to store the model's tensorboard data (if using Tensorflow backend)" ) parser.add_argument( '--arg', dest='model_args', action='append', help="Optional arguments to be passed to create the model") args = parser.parse_args() # Configure the tensorflow session to not run out of memory if (K.backend() == 'tensorflow'): config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.95 config.gpu_options.allocator_type = 'BFC' config.gpu_options.allow_growth = True set_session(tf.Session(config=config)) # Create the model using the optional parameters passed in if (not args.model_args): args.model_args = [] model = Model(name=args.model_name) model.create_from(args.create_fn, *args.model_args) # model.load_from('models/json/conv_net_large_res_5.json') # Temporary solution to running a model under a new name # model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() _log.info('\n') # _log.info('Saving model to file system...') # model.save_to(json_file=args.json_file, yaml_file=args.yaml_file) _log.info('Loading model weights...') model.load_weights(weights_file=args.weights_file, by_name=True) # model.load_weights(weights_file='models/weights/danq_17.hdf5', by_name=True) # pop_layer(model) # model.layers.pop(); # Get rid of fc2 layer # model.outputs = [model.layers[-1].output] # model.output_layers = [model.layers[-1]] # model.layers[-1].outbound_notes = [] data = ModelData(batch_size=BATCH_SIZE) # Shrink the training dataset to half of its original size # train, valid, test = data.get_data_tuples_generator(shrink_size=(TRUNCATE_TRAIN_RATIO, 1, 1), # nb_samples=(TRAIN_SAMPLES, VALID_SAMPLES, TEST_SAMPLES)) # train, valid, test = data.get_data_tuples(shrink_size=(TRUNCATE_TRAIN_RATIO, 1, 1)) _log.info('Retrieving training data...') train = data.get_train_tuple(shrink_size=TRUNCATE_TRAIN_RATIO) _log.info('Retrieving validation data...') valid = data.get_valid_tuple() log_utils.print_date_time(_log) _log.info('\n') start = time.time() _log.info('Training model...') model.train(train=train, valid=valid, weights_file=args.weights_file, max_epoch=MAX_EPOCH, batch_size=BATCH_SIZE, nb_samples=(TRAIN_SAMPLES, VALID_SAMPLES), log_file=args.log_file, tensorboard_dir=args.tensorboard_dir) _log.info('\n') log_utils.print_date_time(_log) log_utils.print_elapsed_time(_log, start=start, end=time.time()) _log.info('\n') _log.info('Retrieving testing data...') test = data.get_test_tuple() _log.info('\n') _log.info('Testing model...') model.test(test=test, nb_samples=TEST_SAMPLES) _log.info('\n') _log.info('Creating predictions...') model.predict(test.X_test)