f'lr-{args.base_learning_rate}-bsz_{args.batch_size}', current_time) # run_records = gather_run_data(args.experiment_root_dir, run=args.run_name, return_type='records') # source_run_record = [rec for rec in run_records if rec['domain']=='source'][0] # target_run_record = [rec for rec in run_records if rec['domain']=='target'][0] dataset_config_source_domain = DatasetConfig( experiment_name=experiment_name, experiment_root_dir=args.experiment_root_dir, dataset_name=args.source_datasets, label_col='family', # target_size=target_size, # num_channels=num_channels, grayscale=(args.color_type == 'grayscale'), color_type=args.color_type, low_class_count_thresh=args.low_class_count_thresh, data_splits={ 'val_size': 0.2, 'test_size': 0.0 }, tfrecord_root_dir=args.tfrecord_dir, # data_db_path=args.data_db_path, num_shards=10) dataset_config_target_domain = DatasetConfig( experiment_name=experiment_name, experiment_root_dir=args.experiment_root_dir, dataset_name=args.target_datasets, label_col='family', # target_size=target_size,
"%Y-%m-%d_%H-%M-%S") experiment_dir = os.path.join( r'/media/data/jacob/Fossil_Project', 'experiments', args.model_name, args.dataset_name, args.color_type, f'lr-{args.base_learning_rate}-bsz_{args.batch_size}', current_time) reset_eager_session() dataset_config = DatasetConfig( dataset_name=args.dataset_name, label_col='family', # target_size=target_size, # num_channels=num_channels, grayscale=(args.color_type == 'grayscale'), low_class_count_thresh=args.low_class_count_thresh, data_splits={ 'val_size': 0.2, 'test_size': 0.2 }, tfrecord_root_dir=args.tfrecord_dir, data_db_path=args.data_db_path, num_shards=10) train_config = TrainConfig( model_name=args.model_name, model_dir=args.model_dir, batch_size=args.batch_size, frozen_layers=None, #(0,-4), base_learning_rate=args.base_learning_rate, buffer_size=500, num_epochs=args.num_epochs,
# ''' # def __init__(self, *args, **kwargs): # class KerasTrainer(BaseTrain): # def __init__(self, experiment_config) if __name__ == '__main__': dataset_config = DatasetConfig( dataset_name='PNAS', label_col='family', target_size=(224, 224), channels=3, low_class_count_thresh=3, data_splits={ 'val_size': 0.2, 'test_size': 0.2 }, tfrecord_root_dir=r'/media/data/jacob/Fossil_Project/tfrecord_data', num_shards=10) train_config = TrainConfig(model_name='vgg16', batch_size=64, frozen_layers=(0, -4), base_learning_rate=1e-4, buffer_size=1000, num_epochs=100, preprocessing='imagenet', augment_images=True, seed=3)
target_size=(224,224) histories = [] with mlflow.start_run(run_name=f'{args.model_name}-{args.dataset_name}-{color_type}-lr_{args.base_learning_rate}-bsz_{args.batch_size}', nested=True): current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") experiment_dir = os.path.join(r'/media/data/jacob/Fossil_Project','experiments',args.model_name,args.dataset_name,color_type,f'lr-{args.base_learning_rate}-bsz_{args.batch_size}',current_time) reset_eager_session() dataset_config = DatasetConfig(dataset_name=args.dataset_name, label_col='family', target_size=target_size, num_channels=1, grayscale=True, low_class_count_thresh=args.low_class_count_thresh, data_splits={'val_size':0.2,'test_size':0.2}, tfrecord_root_dir=args.tfrecord_dir, num_shards=10) train_config = TrainConfig(model_name=args.model_name, batch_size=args.batch_size, frozen_layers=(0,-4), base_learning_rate=args.base_learning_rate, buffer_size=500, num_epochs=args.num_epochs, preprocessing=True, augment_images=True, augmentations=['rotate','flip'], regularization={'l2':0.001},
] print('model_name=', args.model_name) if args.model_name in ['vgg16', 'resnet_50_v2', 'resnet_101_v2']: target_size = (224, 224) elif args.model_name == 'xception': target_size = (299, 299) else: target_size = (224, 224) print('target_size=', target_size) dataset_config = DatasetConfig(dataset_name=args.dataset_name, label_col='family', target_size=target_size, num_channels=args.num_channels, low_class_count_thresh=3, data_splits={ 'val_size': 0.2, 'test_size': 0.2 }, num_shards=10, input_format=dict) train_config = TrainConfig(model_name=args.model_name, batch_size=args.batch_size, frozen_layers=(0, -4), base_learning_rate=args.base_learning_rate, buffer_size=500, num_epochs=args.num_epochs, preprocessing=True, augment_images=True, augmentations=['rotate', 'flip'],
# experiment_root_dir=args.experiment_root_dir, # tfrecord_root_dir=args.tfrecord_dir, # low_class_count_thresh=10, # data_configs={ # args.domain: domain_config_0 # }) # dataset_config_domain.init_config_file() dataset_config = DatasetConfig( experiment_name=experiment_name, **run_records[0], experiment_root_dir=args.experiment_root_dir, label_col='family', # target_size=target_size, # num_channels=num_channels, grayscale=(args.color_type == 'grayscale'), color_type=args.color_type, low_class_count_thresh=args.low_class_count_thresh, data_splits={ 'val_size': 0.0, 'test_size': 0.5 }, tfrecord_root_dir=args.tfrecord_dir, num_shards=10) train_config = TrainConfig( model_name=args.model_name, model_dir=args.model_dir, batch_size=args.batch_size, frozen_layers=None, base_learning_rate=args.base_learning_rate, buffer_size=500,
def main(dataset_name='PNAS', model_name='vgg16', experiment_dir=r'/media/data/jacob/Fossil_Project/vgg16/PNAS', gpu_ids=[0], tfrecord_root_dir=r'/media/data/jacob/Fossil_Project/tfrecord_data', batch_size=64, target_size=(224, 224), base_learning_rate=0.001, num_epochs=100, preprocessing='imagenet', augment_images=False): reset_eager_session() # tf.reset_default_graph() dataset_config = DatasetConfig(dataset_name=dataset_name, label_col='family', target_size=target_size, channels=3, low_class_count_thresh=3, data_splits={ 'val_size': 0.2, 'test_size': 0.2 }, tfrecord_root_dir=tfrecord_root_dir, num_shards=10) train_config = TrainConfig(model_name=model_name, batch_size=batch_size, frozen_layers=(0, -4), base_learning_rate=base_learning_rate, buffer_size=1000, num_epochs=num_epochs, preprocessing=preprocessing, augment_images=augment_images, seed=3) experiment_config = ExperimentConfig(dataset_config=dataset_config, train_config=train_config) ############################################ #TODO: Move config definitions outside main() for: # 1. simplifying overall logic in main & segregating configuration to section marked by if __name__=='__main__' # 2. Moving towards defining most or all run parameters in separate config files ############################################ def build_tunable_model(hp): return build_model(name=hp.Choice('name', values=[ 'shallow', 'vgg16', 'xception', 'resnet_50_v2', 'resnet_101_v2' ]), num_classes=10000, frozen_layers=(0, -4), input_shape=(224, 224, 3), base_learning_rate=hp.Float('base_learning_rate', min_value=1e-6, max__value=0.01, sampling='log')) RandomSearch(build_tunable_model, objective='val_accuracy', max_trials=num_epochs, seed=seed, hyperparameters=None, tune_new_entries=True, allow_new_entries=True, **kwargs) trainer = BaseTrainer(experiment_config=experiment_config) train_data = trainer.get_data_loader(subset='train') val_data = trainer.get_data_loader(subset='val') test_data = trainer.get_data_loader(subset='test') # AUTOTUNE = tf.data.experimental.AUTOTUNE # train_data = tfds.load("mnist", split='train').shuffle(1000).batch(batch_size).repeat().prefetch(AUTOTUNE) model_params = trainer.get_model_params('train') fit_params = trainer.get_fit_params() callbacks = get_callbacks(weights_best=os.path.join( experiment_dir, 'weights_best.h5'), logs_dir=os.path.join(experiment_dir, 'logdir'), restore_best_weights=False) model = build_model( **model_params ) #name='shallow', num_classes=10000, frozen_layers=(0,-4), input_shape=(224,224,3), base_learning_rate=0.0001) history = model.fit(train_data, steps_per_epoch=fit_params['steps_per_epoch'], epochs=fit_params['epochs'], validation_data=val_data, validation_steps=fit_params['validation_steps'], callbacks=callbacks) return history