Exemplo n.º 1
0
def main(experiment_config, experiment_results_dir):

    ############################################
    #TODO: Moving towards defining most or all run parameters in separate config files
    ############################################

    domain = experiment_config.domain
    label_mapping_filepath = experiment_config['label_mappings']

    label_encoder = LabelEncoder(filepath=label_mapping_filepath)
    print(label_encoder)
    trainer = CSVTrainer(experiment_config, label_encoder=label_encoder)

    trainer.init_model_builder()

    model_filepath = os.path.join(
        trainer.model_manager.model_dir,
        trainer.model_name + '_' + domain + '_model.h5')

    train_data = trainer.get_data_loader(subset='train')
    val_data = trainer.get_data_loader(subset='val')
    test_data = trainer.get_data_loader(subset='test')

    #Get parameters for fitting and callbacks
    fit_params = trainer.get_fit_params()
    callbacks = get_callbacks(weights_best=os.path.join(
        trainer.model_manager.model_dir,
        trainer.model_name + '_' + domain + '_model_weights_best.h5'),
                              logs_dir=os.path.join(experiment_results_dir,
                                                    'tensorboard_logs'),
                              restore_best_weights=True)

    history = trainer.fit(train_data,
                          steps_per_epoch=fit_params['steps_per_epoch'],
                          epochs=fit_params['epochs'],
                          validation_data=val_data,
                          validation_steps=fit_params['validation_steps'],
                          callbacks=callbacks)  #,
    #                      history_name=domain
    #                      )
    trainer.histories[domain] = history

    trainer.save_model(filepath=model_filepath)
    #######################################################################
    # TARGET DOMAIN

    #trainer.load_model(filepath=source_model_filepath)
    num_test_samples = trainer.metadata_splits['test']['num_samples']
    num_steps = num_test_samples // trainer.config['batch_size']
    test_results = [
        trainer.evaluate(test_data, steps=num_steps, log_name='test')
    ]  #'trained-on-source_train--evaluated-on-source_test')]

    trainer.test_results = test_results

    return trainer
Exemplo n.º 2
0
def main(experiment_config, experiment_dir):


    trainer = BaseTrainer(experiment_config=experiment_config)
    
#     for subset, paths in trainer.tfrecord_files.items():
#         if experiment_config.verbose: print(subset)
#         for path in paths:
#             if experiment_config.verbose: print('\t',path)
#             mlflow.log_artifact(path,f'artifacts/{subset}')    

    train_data = trainer.get_data_loader(subset='train')
    val_data = trainer.get_data_loader(subset= 'val')
    test_data = trainer.get_data_loader(subset='test')

    
    model_params = trainer.get_model_config('train')
    fit_params = trainer.get_fit_params()
    callbacks = get_callbacks(weights_best=os.path.join(experiment_dir,'weights_best.h5'), 
                              logs_dir=os.path.join(experiment_dir,'tensorboard_logs'), 
                              restore_best_weights=False,
                              val_data=None)

    print('model_params',model_params)
    
    if 'vgg16' in experiment_config.model_name:
        model_builder = VGG16GrayScale(model_params)
        model = model_builder.build_model()
    elif 'resnet' in experiment_config.model_name:
        print('ResNet still in progress')
        return None
    
    history = model.fit(train_data,
                 steps_per_epoch = fit_params['steps_per_epoch'],
                 epochs=fit_params['epochs'],
                 validation_data=val_data,
                 validation_steps=fit_params['validation_steps'],
                 callbacks=callbacks
                 )
    
    trainer.config['model_config'] = model_params
    trainer.config.train_config['fit_params'] = fit_params
    trainer.history = history
    
    return trainer
Exemplo n.º 3
0
def main(experiment_config, experiment_dir):

    ############################################
    #TODO: Moving towards defining most or all run parameters in separate config files
    ############################################

    trainer = BaseTrainer(experiment_config=experiment_config)

    #     for subset, paths in trainer.tfrecord_files.items():
    #         if experiment_config.verbose: print(subset)
    #         for path in paths:
    #             if experiment_config.verbose: print('\t',path)
    #             mlflow.log_artifact(path,f'artifacts/{subset}')

    train_data = trainer.get_data_loader(subset='train')
    val_data = trainer.get_data_loader(subset='val')
    test_data = trainer.get_data_loader(subset='test')

    #     debug=False
    #     if debug:
    #         if tf.executing_eagerly():
    #             batch_imgs, batch_labels = next(iter(val_data))
    #         else:
    #             validation_iterator = val_data.make_one_shot_iterator()
    #             val_data_next = validation_iterator.get_next()
    #             sess = tf.compat.v1.Session()
    #             batch_imgs, batch_labels = sess.run(val_data_next)

    #         from pyleaves.analysis.img_utils import plot_image_grid

    #         plot_image_grid(batch_imgs, [np.argmax(l) for l in batch_labels], 8, 8)
    #         for i in range(64):
    #             img = batch_imgs[i,...]
    #             print(i, f'min = {np.min(img):.2f}, max = {np.max(img):.2f}, mean = {np.mean(img):.2f}, std = {np.std(img):.2f}')

    #         #From [-1.0,1.0] to [0,255]
    #         uint_imgs = np.array(batch_imgs)
    #         uint_imgs += 1
    #         uint_imgs /= 2
    #         uint_imgs *= 255
    #         uint_imgs = uint_imgs.astype(np.uint8)

    #         print(f'min = {np.min(batch_imgs):.2f}, max = {np.max(batch_imgs):.2f}, mean = {np.mean(batch_imgs):.2f}, std = {np.std(batch_imgs):.2f}')
    #         print(f'min = {np.min(uint_imgs)}, max = {np.max(uint_imgs)}, mean = {np.mean(uint_imgs):.2f}, std = {np.std(uint_imgs):.2f}')

    #         plot_image_grid(uint_imgs, [np.argmax(l) for l in batch_labels], 8, 8)

    trainer.init_model_builder()

    #     model_config = trainer.get_model_config('train')
    fit_params = trainer.get_fit_params()
    callbacks = get_callbacks(weights_best=os.path.join(
        experiment_dir, 'weights_best.h5'),
                              logs_dir=os.path.join(experiment_dir,
                                                    'tensorboard_logs'),
                              restore_best_weights=False,
                              val_data=None)

    #     model_name = model_config.model_name
    #     print('model_config:\n',json.dumps(model_config,indent=4))

    #     if model_name is 'vgg16':
    #         model_builder = VGG16GrayScale(model_config)
    #         model = model_builder.build_model()

    #     elif model_name.startswith('resnet'):
    #         model_builder = ResNet(model_config)
    #         model = model_builder.build_model()

    #     else:
    #         model = build_model(**model_config)

    history = trainer.model.fit(
        train_data,
        steps_per_epoch=fit_params['steps_per_epoch'],
        epochs=fit_params['epochs'],
        validation_data=val_data,
        validation_steps=fit_params['validation_steps'],
        callbacks=callbacks)

    #     trainer.config['model_config'] = model_config
    #     trainer.config.train_config['fit_params'] = fit_params
    trainer.history = history

    return trainer
def main(experiment_configs, experiment_results_dir):

    ############################################
    #TODO: Moving towards defining most or all run parameters in separate config files
    ############################################
    label_encoders = {}
    for i, domain in enumerate(['source', 'target']):
        label_mapping_filepath = experiment_configs[i]['label_mappings']
        label_encoders.update(
            {domain: LabelEncoder(filepath=label_mapping_filepath)})
        print(domain, len(label_encoders[domain]))

    trainer = TransferTrainer(experiment_configs,
                              trainer_constructor=CSVTrainer,
                              label_encoders=label_encoders)

    trainer.init_model_builder(domain='source')

    source_model_filepath = os.path.join(
        trainer.model_manager.model_dir,
        trainer.model_name + '_source_model.h5')
    target_model_filepath = os.path.join(
        trainer.model_manager.model_dir,
        trainer.model_name + '_target_model.h5')

    source_train_data = trainer.get_data_loader(domain='source',
                                                subset='train')
    source_val_data = trainer.get_data_loader(domain='source', subset='val')

    #Get parameters for fitting and callbacks
    fit_params = trainer.get_fit_params(domain='source')
    callbacks = get_callbacks(weights_best=os.path.join(
        trainer.model_manager.model_dir, 'source_domain_weights_best.h5'),
                              logs_dir=os.path.join(experiment_results_dir,
                                                    'tensorboard_logs'),
                              restore_best_weights=True)

    # TRAIN ON SOURCE DOMAIN

    history = trainer.fit(source_train_data,
                          steps_per_epoch=fit_params['steps_per_epoch'],
                          epochs=fit_params['epochs'],
                          validation_data=source_val_data,
                          validation_steps=fit_params['validation_steps'],
                          callbacks=callbacks,
                          history_name='source')
    trainer.histories['source'] = history

    trainer.save_model(filepath=source_model_filepath)
    #######################################################################
    # TARGET DOMAIN

    trainer.load_model(filepath=source_model_filepath)

    target_train_data = trainer.get_data_loader(domain='target',
                                                subset='train')
    target_val_data = trainer.get_data_loader(domain='target', subset='val')
    target_test_data = trainer.get_data_loader(domain='target', subset='test')

    fit_params = trainer.get_fit_params(domain='target')
    callbacks = get_callbacks(weights_best=os.path.join(
        trainer.model_manager.model_dir, 'target_domain_weights_best.h5'),
                              logs_dir=os.path.join(experiment_results_dir,
                                                    'tensorboard_logs'),
                              restore_best_weights=True)

    num_test_samples = trainer.domains['target'].metadata_splits['test'][
        'num_samples']
    num_steps = num_test_samples // trainer.domains['target'].config[
        'batch_size']
    test_results = []
    test_results += [
        trainer.evaluate(target_test_data,
                         steps=num_steps,
                         log_name='0-shot_test')
    ]

    # FINETUNE ON TARGET DOMAIN

    history = trainer.fit(target_train_data,
                          steps_per_epoch=fit_params['steps_per_epoch'],
                          epochs=fit_params['epochs'],
                          validation_data=target_val_data,
                          validation_steps=fit_params['validation_steps'],
                          callbacks=callbacks,
                          history_name='target')

    trainer.histories['target'] = history

    test_results += [
        trainer.evaluate(target_test_data,
                         steps=num_steps,
                         log_name='test_acc')
    ]
    trainer.test_results = test_results

    return trainer
def main():
    from pprint import pprint
    import sys
    import os
    import numpy as np
    import random

    gpu = 0
    if '--gpu' in sys.argv:
        gpu = int(sys.argv[sys.argv.index('--gpu') + 1])
        print('--gpu ', gpu)

    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
    print('os.environ["CUDA_VISIBLE_DEVICES"] : ',
          os.environ["CUDA_VISIBLE_DEVICES"])
    import tensorflow as tf
    # tf.compat.v1.enable_eager_execution()
    print('tf.executing_eagerly()', tf.executing_eagerly())

    from stuf import stuf
    from pyleaves.configs.config_v2 import BaseConfig
    import json

    try:
        config = BaseConfig().parse(args=sys.argv[1:])
    except Exception as e:
        print(e)
        print("missing or invalid arguments")
        print('sys.argv = ', sys.argv[1:])
        exit(0)

    from pyleaves.leavesdb.tf_utils.tf_utils import set_random_seed
    set_random_seed(config.seed)

    from pyleaves.base.base_data_manager import DataManager
    from pyleaves.base.base_trainer import ModelBuilder, BaseTrainer
    from pyleaves.train.callbacks import get_callbacks
    from pyleaves.loggers.mlflow_logger import MLFlowLogger as Logger

    data_manager = DataManager(config=config)
    train_data = data_manager.get_data_loader(file_group='train')
    val_data = data_manager.get_data_loader(file_group='val')
    test_data = data_manager.get_data_loader(file_group='test')

    model_builder = ModelBuilder(config)
    callbacks = get_callbacks(
        weights_best=os.path.join(config.model_config.model_dir,
                                  'weights_best.h5'),
        logs_dir=os.path.join(config.model_config.log_dir, 'tensorboard_logs'),
        val_data=val_data,
        batches_per_epoch=0,  #30,
        freq=0,  #5,
        histogram_freq=0,
        restore_best_weights=True,
        seed=config.seed)

    logger = Logger(config)

    trainer = BaseTrainer(config, model_builder, data_manager, logger,
                          callbacks)

    pprint(config)
    print('INITIATING TRAINING')
    import numpy as np
    # import pdb; pdb.set_trace()
    class_weights = trainer.class_weights
    class_weights = class_weights.assign(y=class_weights['y'] /
                                         np.max(trainer.class_weights['y']))
    trainer.train(class_weights=None)  #class_weights)

    trainer.save_model(config.run_id + '_model')

    trainer.test()
def main(experiment_configs, experiment_dir):

    ############################################
    #TODO: Moving towards defining most or all run parameters in separate config files
    ############################################

    trainer = TransferTrainer(experiment_configs,
                              src_db=os.path.join(pyleaves.RESOURCES_DIR,
                                                  'updated_leavesdb.db'))

    trainer.init_model_builder(domain='source')

    source_model_filepath = os.path.join(
        trainer.model_manager.model_dir,
        trainer.model_name + '_source_model.h5')
    target_model_filepath = os.path.join(
        trainer.model_manager.model_dir,
        trainer.model_name + '_target_model.h5')

    source_train_data = trainer.get_data_loader(domain='source',
                                                subset='train')
    source_val_data = trainer.get_data_loader(domain='source', subset='val')

    #Get parameters for fitting and callbacks
    fit_params = trainer.get_fit_params(domain='source')
    callbacks = get_callbacks(weights_best=os.path.join(
        experiment_dir, 'source_domain_weights_best.h5'),
                              logs_dir=os.path.join(experiment_dir,
                                                    'tensorboard_logs'),
                              restore_best_weights=True)

    # TRAIN ON SOURCE DOMAIN

    history = trainer.fit(source_train_data,
                          steps_per_epoch=fit_params['steps_per_epoch'],
                          epochs=fit_params['epochs'],
                          validation_data=source_val_data,
                          validation_steps=fit_params['validation_steps'],
                          callbacks=callbacks,
                          history_name='source')
    trainer.histories['source'] = history

    trainer.save_model(filepath=source_model_filepath)
    #######################################################################
    # TARGET DOMAIN

    #trainer.load_model(filepath=source_model_filepath)

    target_train_data = trainer.get_data_loader(domain='target',
                                                subset='train')
    target_val_data = trainer.get_data_loader(domain='target', subset='val')
    target_test_data = trainer.get_data_loader(domain='target', subset='test')

    fit_params = trainer.get_fit_params(domain='target')
    callbacks = get_callbacks(weights_best=os.path.join(
        experiment_dir, 'target_domain_weights_best.h5'),
                              logs_dir=os.path.join(experiment_dir,
                                                    'tensorboard_logs'),
                              restore_best_weights=True)

    num_test_samples = trainer.domains['target'].metadata_splits['test'][
        'num_samples']
    num_steps = num_test_samples // trainer.domains['target'].config[
        'batch_size']
    test_results = []
    test_results += [
        trainer.evaluate(
            target_test_data,
            steps=num_steps,
            log_name='trained:[source_train],evaluate:[target_test]')
    ]

    # FINETUNE ON TARGET DOMAIN

    history = trainer.fit(target_train_data,
                          steps_per_epoch=fit_params['steps_per_epoch'],
                          epochs=fit_params['epochs'],
                          validation_data=target_val_data,
                          validation_steps=fit_params['validation_steps'],
                          callbacks=callbacks,
                          history_name='target')

    trainer.histories['target'] = history

    test_results += [
        trainer.evaluate(
            target_test_data,
            steps=num_steps,
            log_name=
            'trained:[source_train+target_train],evaluate:[target_test]')
    ]
    trainer.test_results = test_results

    return trainer
Exemplo n.º 7
0
    train_data = trainer.get_data_loader(
        subset='train')  #, skip_preprocessing=True)
    val_data = trainer.get_data_loader(subset='train')  #'val')
    test_data = trainer.get_data_loader(subset='test')

    model_params = trainer.get_model_params('train')
    fit_params = trainer.get_fit_params()

    with mlflow.start_run(
            run_name=
            f'tfds-{args.model_name}-{args.dataset_name}-lr_{args.base_learning_rate}_baseline',
            nested=True):
        mlflow.tensorflow.autolog()

        callbacks = get_callbacks(
            weights_best=os.path.join(experiment_dir, 'weights_best.h5'),
            logs_dir=os.path.join(experiment_dir, 'logdir'),
            restore_best_weights=False)

        print('model_params', model_params)

        model = build_model(
            **model_params
        )  #name='shallow', num_classes=10000, frozen_layers=(0,-4), input_shape=(224,224,3), base_learning_rate=0.0001)

        history = model.fit(train_data,
                            steps_per_epoch=fit_params['steps_per_epoch'],
                            epochs=fit_params['epochs'],
                            validation_data=val_data,
                            validation_steps=fit_params['validation_steps'],
                            callbacks=callbacks)
Exemplo n.º 8
0
def main(dataset_name='PNAS',
         model_name='vgg16',
         experiment_dir=r'/media/data/jacob/Fossil_Project/vgg16/PNAS',
         gpu_ids=[0],
         tfrecord_root_dir=r'/media/data/jacob/Fossil_Project/tfrecord_data',
         batch_size=64,
         target_size=(224, 224),
         base_learning_rate=0.001,
         num_epochs=100,
         preprocessing='imagenet',
         augment_images=False):

    reset_eager_session()
    #     tf.reset_default_graph()
    dataset_config = DatasetConfig(dataset_name=dataset_name,
                                   label_col='family',
                                   target_size=target_size,
                                   channels=3,
                                   low_class_count_thresh=3,
                                   data_splits={
                                       'val_size': 0.2,
                                       'test_size': 0.2
                                   },
                                   tfrecord_root_dir=tfrecord_root_dir,
                                   num_shards=10)

    train_config = TrainConfig(model_name=model_name,
                               batch_size=batch_size,
                               frozen_layers=(0, -4),
                               base_learning_rate=base_learning_rate,
                               buffer_size=1000,
                               num_epochs=num_epochs,
                               preprocessing=preprocessing,
                               augment_images=augment_images,
                               seed=3)

    experiment_config = ExperimentConfig(dataset_config=dataset_config,
                                         train_config=train_config)

    ############################################
    #TODO: Move config definitions outside main() for:
    #    1. simplifying overall logic in main & segregating configuration to section marked by if __name__=='__main__'
    #    2. Moving towards defining most or all run parameters in separate config files
    ############################################

    def build_tunable_model(hp):

        return build_model(name=hp.Choice('name',
                                          values=[
                                              'shallow', 'vgg16', 'xception',
                                              'resnet_50_v2', 'resnet_101_v2'
                                          ]),
                           num_classes=10000,
                           frozen_layers=(0, -4),
                           input_shape=(224, 224, 3),
                           base_learning_rate=hp.Float('base_learning_rate',
                                                       min_value=1e-6,
                                                       max__value=0.01,
                                                       sampling='log'))

    RandomSearch(build_tunable_model,
                 objective='val_accuracy',
                 max_trials=num_epochs,
                 seed=seed,
                 hyperparameters=None,
                 tune_new_entries=True,
                 allow_new_entries=True,
                 **kwargs)

    trainer = BaseTrainer(experiment_config=experiment_config)

    train_data = trainer.get_data_loader(subset='train')
    val_data = trainer.get_data_loader(subset='val')
    test_data = trainer.get_data_loader(subset='test')

    #     AUTOTUNE = tf.data.experimental.AUTOTUNE
    #     train_data = tfds.load("mnist", split='train').shuffle(1000).batch(batch_size).repeat().prefetch(AUTOTUNE)

    model_params = trainer.get_model_params('train')
    fit_params = trainer.get_fit_params()
    callbacks = get_callbacks(weights_best=os.path.join(
        experiment_dir, 'weights_best.h5'),
                              logs_dir=os.path.join(experiment_dir, 'logdir'),
                              restore_best_weights=False)

    model = build_model(
        **model_params
    )  #name='shallow', num_classes=10000, frozen_layers=(0,-4), input_shape=(224,224,3), base_learning_rate=0.0001)

    history = model.fit(train_data,
                        steps_per_epoch=fit_params['steps_per_epoch'],
                        epochs=fit_params['epochs'],
                        validation_data=val_data,
                        validation_steps=fit_params['validation_steps'],
                        callbacks=callbacks)
    return history