f'lr-{args.base_learning_rate}-bsz_{args.batch_size}',
                current_time)

            #             run_records = gather_run_data(args.experiment_root_dir, run=args.run_name, return_type='records')
            #             source_run_record = [rec for rec in run_records if rec['domain']=='source'][0]
            #             target_run_record = [rec for rec in run_records if rec['domain']=='target'][0]

            dataset_config_source_domain = DatasetConfig(
                experiment_name=experiment_name,
                experiment_root_dir=args.experiment_root_dir,
                dataset_name=args.source_datasets,
                label_col='family',
                #                                             target_size=target_size,
                #                                             num_channels=num_channels,
                grayscale=(args.color_type == 'grayscale'),
                color_type=args.color_type,
                low_class_count_thresh=args.low_class_count_thresh,
                data_splits={
                    'val_size': 0.2,
                    'test_size': 0.0
                },
                tfrecord_root_dir=args.tfrecord_dir,
                #                                             data_db_path=args.data_db_path,
                num_shards=10)

            dataset_config_target_domain = DatasetConfig(
                experiment_name=experiment_name,
                experiment_root_dir=args.experiment_root_dir,
                dataset_name=args.target_datasets,
                label_col='family',
                #                                             target_size=target_size,
Exemplo n.º 2
0
                "%Y-%m-%d_%H-%M-%S")

            experiment_dir = os.path.join(
                r'/media/data/jacob/Fossil_Project', 'experiments',
                args.model_name, args.dataset_name, args.color_type,
                f'lr-{args.base_learning_rate}-bsz_{args.batch_size}',
                current_time)
            reset_eager_session()

            dataset_config = DatasetConfig(
                dataset_name=args.dataset_name,
                label_col='family',
                #                                             target_size=target_size,
                #                                             num_channels=num_channels,
                grayscale=(args.color_type == 'grayscale'),
                low_class_count_thresh=args.low_class_count_thresh,
                data_splits={
                    'val_size': 0.2,
                    'test_size': 0.2
                },
                tfrecord_root_dir=args.tfrecord_dir,
                data_db_path=args.data_db_path,
                num_shards=10)

            train_config = TrainConfig(
                model_name=args.model_name,
                model_dir=args.model_dir,
                batch_size=args.batch_size,
                frozen_layers=None,  #(0,-4),
                base_learning_rate=args.base_learning_rate,
                buffer_size=500,
                num_epochs=args.num_epochs,
Exemplo n.º 3
0
#     '''
#     def __init__(self, *args, **kwargs):

# class KerasTrainer(BaseTrain):

#     def __init__(self, experiment_config)

if __name__ == '__main__':

    dataset_config = DatasetConfig(
        dataset_name='PNAS',
        label_col='family',
        target_size=(224, 224),
        channels=3,
        low_class_count_thresh=3,
        data_splits={
            'val_size': 0.2,
            'test_size': 0.2
        },
        tfrecord_root_dir=r'/media/data/jacob/Fossil_Project/tfrecord_data',
        num_shards=10)

    train_config = TrainConfig(model_name='vgg16',
                               batch_size=64,
                               frozen_layers=(0, -4),
                               base_learning_rate=1e-4,
                               buffer_size=1000,
                               num_epochs=100,
                               preprocessing='imagenet',
                               augment_images=True,
                               seed=3)
Exemplo n.º 4
0
                        target_size=(224,224)

                    histories = []

                    with mlflow.start_run(run_name=f'{args.model_name}-{args.dataset_name}-{color_type}-lr_{args.base_learning_rate}-bsz_{args.batch_size}', nested=True):
                        current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

                        experiment_dir = os.path.join(r'/media/data/jacob/Fossil_Project','experiments',args.model_name,args.dataset_name,color_type,f'lr-{args.base_learning_rate}-bsz_{args.batch_size}',current_time)

                        reset_eager_session()

                        dataset_config = DatasetConfig(dataset_name=args.dataset_name,
                                                       label_col='family',
                                                       target_size=target_size,
                                                       num_channels=1,
                                                       grayscale=True,
                                                       low_class_count_thresh=args.low_class_count_thresh,
                                                       data_splits={'val_size':0.2,'test_size':0.2},
                                                       tfrecord_root_dir=args.tfrecord_dir,
                                                       num_shards=10)

                        train_config = TrainConfig(model_name=args.model_name,
                                                   batch_size=args.batch_size,
                                                   frozen_layers=(0,-4),
                                                   base_learning_rate=args.base_learning_rate,
                                                   buffer_size=500,
                                                   num_epochs=args.num_epochs,
                                                   preprocessing=True,
                                                   augment_images=True,
                                                   augmentations=['rotate','flip'],
                                                   regularization={'l2':0.001},
Exemplo n.º 5
0
    ]

    print('model_name=', args.model_name)
    if args.model_name in ['vgg16', 'resnet_50_v2', 'resnet_101_v2']:
        target_size = (224, 224)
    elif args.model_name == 'xception':
        target_size = (299, 299)
    else:
        target_size = (224, 224)
    print('target_size=', target_size)
    dataset_config = DatasetConfig(dataset_name=args.dataset_name,
                                   label_col='family',
                                   target_size=target_size,
                                   num_channels=args.num_channels,
                                   low_class_count_thresh=3,
                                   data_splits={
                                       'val_size': 0.2,
                                       'test_size': 0.2
                                   },
                                   num_shards=10,
                                   input_format=dict)

    train_config = TrainConfig(model_name=args.model_name,
                               batch_size=args.batch_size,
                               frozen_layers=(0, -4),
                               base_learning_rate=args.base_learning_rate,
                               buffer_size=500,
                               num_epochs=args.num_epochs,
                               preprocessing=True,
                               augment_images=True,
                               augmentations=['rotate', 'flip'],
Exemplo n.º 6
0
            #                                                          experiment_root_dir=args.experiment_root_dir,
            #                                                          tfrecord_root_dir=args.tfrecord_dir,
            #                                                          low_class_count_thresh=10,
            #                                                          data_configs={
            #                                                                       args.domain: domain_config_0
            #                                                                      })
            #             dataset_config_domain.init_config_file()

            dataset_config = DatasetConfig(
                experiment_name=experiment_name,
                **run_records[0],
                experiment_root_dir=args.experiment_root_dir,
                label_col='family',
                #                                             target_size=target_size,
                #                                             num_channels=num_channels,
                grayscale=(args.color_type == 'grayscale'),
                color_type=args.color_type,
                low_class_count_thresh=args.low_class_count_thresh,
                data_splits={
                    'val_size': 0.0,
                    'test_size': 0.5
                },
                tfrecord_root_dir=args.tfrecord_dir,
                num_shards=10)

            train_config = TrainConfig(
                model_name=args.model_name,
                model_dir=args.model_dir,
                batch_size=args.batch_size,
                frozen_layers=None,
                base_learning_rate=args.base_learning_rate,
                buffer_size=500,
Exemplo n.º 7
0
def main(dataset_name='PNAS',
         model_name='vgg16',
         experiment_dir=r'/media/data/jacob/Fossil_Project/vgg16/PNAS',
         gpu_ids=[0],
         tfrecord_root_dir=r'/media/data/jacob/Fossil_Project/tfrecord_data',
         batch_size=64,
         target_size=(224, 224),
         base_learning_rate=0.001,
         num_epochs=100,
         preprocessing='imagenet',
         augment_images=False):

    reset_eager_session()
    #     tf.reset_default_graph()
    dataset_config = DatasetConfig(dataset_name=dataset_name,
                                   label_col='family',
                                   target_size=target_size,
                                   channels=3,
                                   low_class_count_thresh=3,
                                   data_splits={
                                       'val_size': 0.2,
                                       'test_size': 0.2
                                   },
                                   tfrecord_root_dir=tfrecord_root_dir,
                                   num_shards=10)

    train_config = TrainConfig(model_name=model_name,
                               batch_size=batch_size,
                               frozen_layers=(0, -4),
                               base_learning_rate=base_learning_rate,
                               buffer_size=1000,
                               num_epochs=num_epochs,
                               preprocessing=preprocessing,
                               augment_images=augment_images,
                               seed=3)

    experiment_config = ExperimentConfig(dataset_config=dataset_config,
                                         train_config=train_config)

    ############################################
    #TODO: Move config definitions outside main() for:
    #    1. simplifying overall logic in main & segregating configuration to section marked by if __name__=='__main__'
    #    2. Moving towards defining most or all run parameters in separate config files
    ############################################

    def build_tunable_model(hp):

        return build_model(name=hp.Choice('name',
                                          values=[
                                              'shallow', 'vgg16', 'xception',
                                              'resnet_50_v2', 'resnet_101_v2'
                                          ]),
                           num_classes=10000,
                           frozen_layers=(0, -4),
                           input_shape=(224, 224, 3),
                           base_learning_rate=hp.Float('base_learning_rate',
                                                       min_value=1e-6,
                                                       max__value=0.01,
                                                       sampling='log'))

    RandomSearch(build_tunable_model,
                 objective='val_accuracy',
                 max_trials=num_epochs,
                 seed=seed,
                 hyperparameters=None,
                 tune_new_entries=True,
                 allow_new_entries=True,
                 **kwargs)

    trainer = BaseTrainer(experiment_config=experiment_config)

    train_data = trainer.get_data_loader(subset='train')
    val_data = trainer.get_data_loader(subset='val')
    test_data = trainer.get_data_loader(subset='test')

    #     AUTOTUNE = tf.data.experimental.AUTOTUNE
    #     train_data = tfds.load("mnist", split='train').shuffle(1000).batch(batch_size).repeat().prefetch(AUTOTUNE)

    model_params = trainer.get_model_params('train')
    fit_params = trainer.get_fit_params()
    callbacks = get_callbacks(weights_best=os.path.join(
        experiment_dir, 'weights_best.h5'),
                              logs_dir=os.path.join(experiment_dir, 'logdir'),
                              restore_best_weights=False)

    model = build_model(
        **model_params
    )  #name='shallow', num_classes=10000, frozen_layers=(0,-4), input_shape=(224,224,3), base_learning_rate=0.0001)

    history = model.fit(train_data,
                        steps_per_epoch=fit_params['steps_per_epoch'],
                        epochs=fit_params['epochs'],
                        validation_data=val_data,
                        validation_steps=fit_params['validation_steps'],
                        callbacks=callbacks)
    return history