Пример #1
0
    def test_generator_wraparound(self):
        data = ModelData()
        nb_samples = 3 * BATCH_SIZE
        train, valid, test = data.get_data_tuples_generator(
            nb_samples=(nb_samples, nb_samples, nb_samples))

        # First batch
        train_batch_1 = next(train)
        valid_batch_1 = next(valid)
        test_batch_1 = next(test)

        # Second and third batches
        for i in range(2):
            self.check_generator_length(train=train, valid=valid, test=test)

        # Fourth batch should wrap back to first batch
        train_batch_4 = next(train)
        valid_batch_4 = next(valid)
        test_batch_4 = next(test)
        self.assertTrue(
            np.array_equal(train_batch_1.X_train, train_batch_4.X_train))
        self.assertTrue(
            np.array_equal(train_batch_1.y_train, train_batch_4.y_train))
        self.assertTrue(
            np.array_equal(valid_batch_1.X_valid, valid_batch_4.X_valid))
        self.assertTrue(
            np.array_equal(valid_batch_1.y_valid, valid_batch_4.y_valid))
        self.assertTrue(
            np.array_equal(test_batch_1.X_test, test_batch_4.X_test))
        self.assertTrue(
            np.array_equal(test_batch_1.y_test, test_batch_4.y_test))
Пример #2
0
    def test_generator_full(self):
        data = ModelData()
        train, valid, test = data.get_data_tuples_generator()

        # First batch
        train_batch_1 = next(train)
        valid_batch_1 = next(valid)
        test_batch_1 = next(test)

        # All batches until end
        self.check_train_generator_length(train)
        self.check_valid_generator_length(valid)
        self.check_test_generator_length(test)

        # Next batch should wrap back to first batch
        train_batch_begin = next(train)
        valid_batch_begin = next(valid)
        test_batch_begin = next(test)
        self.assertTrue(
            np.array_equal(train_batch_1.X_train, train_batch_begin.X_train))
        self.assertTrue(
            np.array_equal(train_batch_1.y_train, train_batch_begin.y_train))
        self.assertTrue(
            np.array_equal(valid_batch_1.X_valid, valid_batch_begin.X_valid))
        self.assertTrue(
            np.array_equal(valid_batch_1.y_valid, valid_batch_begin.y_valid))
        self.assertTrue(
            np.array_equal(test_batch_1.X_test, test_batch_begin.X_test))
        self.assertTrue(
            np.array_equal(test_batch_1.y_test, test_batch_begin.y_test))
Пример #3
0
 def test_shrink_size(self):
     data = ModelData()
     data.set_shrink_size(train=TRAIN_SHRINK,
                          valid=VALID_SHRINK,
                          test=TEST_SHRINK)
     train = data.get_train_tuple()
     self.assertEqual(train.X_train.shape,
                      (TRAIN_SHRINK * TRAIN_SAMPLES, LENGTH, WIDTH))
Пример #4
0
 def test_data_tuples(self):
     data = ModelData()
     train, valid, test = data.get_data_tuples()
     self.assertEqual(train.X_train.shape, (TRAIN_SAMPLES, LENGTH, WIDTH))
     self.assertEqual(train.y_train.shape, (TRAIN_SAMPLES, CLASSES))
     self.assertEqual(valid.X_valid.shape, (VALID_SAMPLES, LENGTH, WIDTH))
     self.assertEqual(valid.y_valid.shape, (VALID_SAMPLES, CLASSES))
     self.assertEqual(test.X_test.shape, (TEST_SAMPLES, LENGTH, WIDTH))
     self.assertEqual(test.y_test.shape, (TEST_SAMPLES, CLASSES))
Пример #5
0
 def test_individual_tuples(self):
     data = ModelData()
     train = data.get_train_tuple()
     valid = data.get_valid_tuple()
     test = data.get_test_tuple()
     self.assertEqual(train.X_train.shape, (TRAIN_SAMPLES, LENGTH, WIDTH))
     self.assertEqual(train.y_train.shape, (TRAIN_SAMPLES, CLASSES))
     self.assertEqual(valid.X_valid.shape, (VALID_SAMPLES, LENGTH, WIDTH))
     self.assertEqual(valid.y_valid.shape, (VALID_SAMPLES, CLASSES))
     self.assertEqual(test.X_test.shape, (TEST_SAMPLES, LENGTH, WIDTH))
     self.assertEqual(test.y_test.shape, (TEST_SAMPLES, CLASSES))
Пример #6
0
 def test_setters_getters(self):
     data = ModelData()
     data.set_shrink_size(train=TRAIN_SHRINK,
                          valid=VALID_SHRINK,
                          test=TEST_SHRINK)
     self.assertEqual(data.nb_train_samples(),
                      floor(TRAIN_SHRINK * TRAIN_SAMPLES))
     self.assertEqual(data.nb_valid_samples(),
                      floor(VALID_SHRINK * VALID_SAMPLES))
     self.assertEqual(data.nb_test_samples(),
                      floor(TEST_SHRINK * TEST_SAMPLES))
Пример #7
0
 def test_open_file(self):
     data = ModelData()
     data.set_shrink_size(train=1, valid=1, test=1)
     train = data.open_train_file()
     valid = data.open_valid_file()
     test = data.open_test_file()
     train_ = h5py.File('data/processed/train.mat')
     valid_ = scipy.io.loadmat('data/processed/valid.mat')
     test_ = scipy.io.loadmat('data/processed/test.mat')
     self.assertEqual(train.keys(), train_.keys())
     self.assertEqual(valid.keys(), valid_.keys())
     self.assertEqual(test.keys(), test_.keys())
Пример #8
0
def main(argv):
    # Set up argument parsing
    parser = argparse.ArgumentParser(
        description='Test a Keras model on genetic sequences ' +
        'to derive epigenetic mechanisms')

    parser.add_argument('model_name',
                        metavar='MODEL_NAME',
                        help="The unique name of the model to create")
    # parser.add_argument('create_fn', metavar='MODEL_FUNC', help="The name of the function in src/models/create_models to create a model with")
    parser.add_argument('weights_file',
                        metavar='WEIGHTS_FILE',
                        help="The file (.hdf5) to store the model's weights")
    parser.add_argument(
        'json_file',
        metavar='JSON_FILE',
        help="The file (.json) to store the model's architecture in JSON")
    parser.add_argument(
        'yaml_file',
        metavar='YAML_FILE',
        help="The file (.yaml) to store the model's architecture in YAML")
    parser.add_argument('log_file',
                        metavar='LOG_FILE',
                        help="The file (.csv) to store the model's epoch logs")
    parser.add_argument(
        'tensorboard_dir',
        metavar='TB_DIR',
        help=
        "The directory to store the model's tensorboard data (if using Tensorflow backend)"
    )
    args = parser.parse_args(argv)

    # Configure the tensorflow session to not run out of memory
    if (K.backend() == 'tensorflow'):
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 0.85
        config.gpu_options.allocator_type = 'BFC'
        config.gpu_options.allow_growth = True
        set_session(tf.Session(config=config))

    data = ModelData()

    model = Model(name=args.model_name)
    print(args.json_file)
    model.load_from(json_file=args.json_file, yaml_file=args.yaml_file
                    )  # Temporary solution to running a model under a new name
    model.load_weights(weights_file=args.weights_file)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    model.summary()

    _log.info('Retrieving test data...')
    test = data.get_test_tuple()

    log_utils.print_date_time(_log)
    start = time.time()

    _log.info('Testing model...')
    model.test(test=test)
    log_utils.print_elapsed_time(_log, start=start, end=time.time())

    _log.info('Creating predictions...')
    y_predict = model.predict(test.X_test)
    log_utils.print_elapsed_time(_log, start=start, end=time.time())

    dict = {}
    dict['predictions'] = np.array(y_predict)
    scipy.io.savemat(
        'models/predictions/y_predict_' + args.model_name + '.mat', dict)

    log_utils.print_date_time(_log)
Пример #9
0
def main(argv):

    # Set up argument parsing
    parser = argparse.ArgumentParser(
        description='Run a Keras model on genetic sequences ' +
        'to derive epigenetic mechanisms')

    parser.add_argument('model_name',
                        metavar='MODEL_NAME',
                        help="The unique name of the model to create")
    parser.add_argument(
        'create_fn',
        metavar='MODEL_FUNC',
        help=
        "The name of the function in src/models/create_models to create a model with"
    )
    parser.add_argument('weights_file',
                        metavar='WEIGHTS_FILE',
                        help="The file (.hdf5) to store the model's weights")
    parser.add_argument(
        'json_file',
        metavar='JSON_FILE',
        help="The file (.json) to store the model's architecture in JSON")
    parser.add_argument(
        'yaml_file',
        metavar='YAML_FILE',
        help="The file (.yaml) to store the model's architecture in YAML")
    parser.add_argument('log_file',
                        metavar='LOG_FILE',
                        help="The file (.csv) to store the model's epoch logs")
    parser.add_argument(
        'tensorboard_dir',
        metavar='TB_DIR',
        help=
        "The directory to store the model's tensorboard data (if using Tensorflow backend)"
    )
    parser.add_argument(
        '--arg',
        dest='model_args',
        action='append',
        help="Optional arguments to be passed to create the model")
    args = parser.parse_args()

    # Configure the tensorflow session to not run out of memory
    if (K.backend() == 'tensorflow'):
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 0.95
        config.gpu_options.allocator_type = 'BFC'
        config.gpu_options.allow_growth = True
        set_session(tf.Session(config=config))

    # Create the model using the optional parameters passed in
    if (not args.model_args):
        args.model_args = []

    model = Model(name=args.model_name)
    model.create_from(args.create_fn, *args.model_args)

    # model.load_from('models/json/conv_net_large_res_5.json') # Temporary solution to running a model under a new name
    # model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    model.summary()

    _log.info('\n')
    # _log.info('Saving model to file system...')
    # model.save_to(json_file=args.json_file, yaml_file=args.yaml_file)

    _log.info('Loading model weights...')
    model.load_weights(weights_file=args.weights_file, by_name=True)
    # model.load_weights(weights_file='models/weights/danq_17.hdf5', by_name=True)
    # pop_layer(model)
    # model.layers.pop(); # Get rid of fc2 layer
    # model.outputs = [model.layers[-1].output]
    # model.output_layers = [model.layers[-1]]
    # model.layers[-1].outbound_notes = []

    data = ModelData(batch_size=BATCH_SIZE)
    # Shrink the training dataset to half of its original size
    # train, valid, test = data.get_data_tuples_generator(shrink_size=(TRUNCATE_TRAIN_RATIO, 1, 1),
    #                                                     nb_samples=(TRAIN_SAMPLES, VALID_SAMPLES, TEST_SAMPLES))
    # train, valid, test = data.get_data_tuples(shrink_size=(TRUNCATE_TRAIN_RATIO, 1, 1))
    _log.info('Retrieving training data...')
    train = data.get_train_tuple(shrink_size=TRUNCATE_TRAIN_RATIO)

    _log.info('Retrieving validation data...')
    valid = data.get_valid_tuple()

    log_utils.print_date_time(_log)
    _log.info('\n')
    start = time.time()

    _log.info('Training model...')
    model.train(train=train,
                valid=valid,
                weights_file=args.weights_file,
                max_epoch=MAX_EPOCH,
                batch_size=BATCH_SIZE,
                nb_samples=(TRAIN_SAMPLES, VALID_SAMPLES),
                log_file=args.log_file,
                tensorboard_dir=args.tensorboard_dir)

    _log.info('\n')
    log_utils.print_date_time(_log)
    log_utils.print_elapsed_time(_log, start=start, end=time.time())
    _log.info('\n')

    _log.info('Retrieving testing data...')
    test = data.get_test_tuple()

    _log.info('\n')
    _log.info('Testing model...')
    model.test(test=test, nb_samples=TEST_SAMPLES)

    _log.info('\n')
    _log.info('Creating predictions...')
    model.predict(test.X_test)