def __init__(self, model_dir=None, initial_iter=0, tensorboard_every_n=0, **_unused): self.tensorboard_every_n = tensorboard_every_n # creating new summary subfolder if it's not finetuning self.summary_dir = get_latest_subfolder( os.path.join(model_dir, 'logs'), create_new=initial_iter == 0) self.writer_train = None self.writer_valid = None GRAPH_CREATED.connect(self.init_writer) ITER_STARTED.connect(self.read_tensorboard_op) ITER_FINISHED.connect(self.write_tensorboard)
def initialise_application(self, workflow_param, data_param): """ This function receives all parameters from user config file, create an instance of application. :param workflow_param: a dictionary of user parameters, keys correspond to sections in the config file :param data_param: a dictionary of input image parameters, keys correspond to data properties to be used by image_reader :return: """ try: system_param = workflow_param.get('SYSTEM', None) net_param = workflow_param.get('NETWORK', None) train_param = workflow_param.get('TRAINING', None) infer_param = workflow_param.get('INFERENCE', None) app_param = workflow_param.get('CUSTOM', None) except AttributeError: tf.logging.fatal('parameters should be dictionaries') raise assert os.path.exists(system_param.model_dir), \ 'Model folder not exists {}'.format(system_param.model_dir) self.is_training = (system_param.action == "train") # hardware-related parameters self.num_threads = max(system_param.num_threads, 1) \ if self.is_training else 1 self.num_gpus = system_param.num_gpus \ if self.is_training else min(system_param.num_gpus, 1) set_cuda_device(system_param.cuda_devices) # set output TF model folders self.model_dir = touch_folder( os.path.join(system_param.model_dir, 'models')) self.session_prefix = os.path.join(self.model_dir, FILE_PREFIX) if self.is_training: assert train_param, 'training parameters not specified' summary_root = os.path.join(system_param.model_dir, 'logs') self.summary_dir = get_latest_subfolder( summary_root, create_new=train_param.starting_iter == 0) # training iterations-related parameters self.initial_iter = train_param.starting_iter self.final_iter = train_param.max_iter self.save_every_n = train_param.save_every_n self.tensorboard_every_n = train_param.tensorboard_every_n self.max_checkpoints = train_param.max_checkpoints self.gradients_collector = GradientsCollector( n_devices=max(self.num_gpus, 1)) action_param = train_param else: assert infer_param, 'inference parameters not specified' self.initial_iter = infer_param.inference_iter action_param = infer_param self.outputs_collector = OutputsCollector( n_devices=max(self.num_gpus, 1)) # create an application instance assert app_param, 'application specific param. not specified' app_module = ApplicationDriver._create_app(app_param.name) self.app = app_module(net_param, action_param, self.is_training) # initialise data input self.app.initialise_dataset_loader(data_param, app_param) # pylint: disable=not-context-manager with self.graph.as_default(), tf.name_scope('Sampler'): self.app.initialise_sampler()
def initialise_application(self, workflow_param, data_param): """ This function receives all parameters from user config file, create an instance of application. :param workflow_param: a dictionary of user parameters, keys correspond to sections in the config file :param data_param: a dictionary of input image parameters, keys correspond to data properties to be used by image_reader :return: """ try: system_param = workflow_param.get('SYSTEM', None) net_param = workflow_param.get('NETWORK', None) train_param = workflow_param.get('TRAINING', None) infer_param = workflow_param.get('INFERENCE', None) app_param = workflow_param.get('CUSTOM', None) except AttributeError: tf.logging.fatal('parameters should be dictionaries') raise assert os.path.exists(system_param.model_dir), \ 'Model folder not exists {}'.format(system_param.model_dir) self.is_training = (system_param.action == "train") # hardware-related parameters self.num_threads = max(system_param.num_threads, 1) \ if self.is_training else 1 self.num_gpus = system_param.num_gpus \ if self.is_training else min(system_param.num_gpus, 1) set_cuda_device(system_param.cuda_devices) # set output TF model folders self.model_dir = touch_folder( os.path.join(system_param.model_dir, 'models')) self.session_prefix = os.path.join(self.model_dir, FILE_PREFIX) # set training params. if self.is_training: assert train_param, 'training parameters not specified' summary_root = os.path.join(system_param.model_dir, 'logs') self.summary_dir = get_latest_subfolder( summary_root, create_new=train_param.starting_iter == 0) self.initial_iter = train_param.starting_iter self.final_iter = max(train_param.max_iter, self.initial_iter) self.save_every_n = train_param.save_every_n self.tensorboard_every_n = train_param.tensorboard_every_n self.max_checkpoints = \ max(train_param.max_checkpoints, self.max_checkpoints) self.gradients_collector = GradientsCollector( n_devices=max(self.num_gpus, 1)) self.validation_every_n = train_param.validation_every_n if self.validation_every_n > 0: self.validation_max_iter = max(self.validation_max_iter, train_param.validation_max_iter) action_param = train_param else: # set inference params. assert infer_param, 'inference parameters not specified' self.initial_iter = infer_param.inference_iter action_param = infer_param self.outputs_collector = OutputsCollector( n_devices=max(self.num_gpus, 1)) # create an application instance assert app_param, 'application specific param. not specified' app_module = ApplicationDriver._create_app(app_param.name) self.app = app_module(net_param, action_param, system_param.action) # initialise data input data_partitioner = ImageSetsPartitioner() # clear the cached file lists data_partitioner.reset() do_new_partition = \ self.is_training and self.initial_iter == 0 and \ (not os.path.isfile(system_param.dataset_split_file)) and \ (train_param.exclude_fraction_for_validation > 0 or train_param.exclude_fraction_for_inference > 0) data_fractions = None if do_new_partition: assert train_param.exclude_fraction_for_validation > 0 or \ self.validation_every_n <= 0, \ 'validation_every_n is set to {}, ' \ 'but train/validation splitting not available,\nplease ' \ 'check "exclude_fraction_for_validation" in the config ' \ 'file (current config value: {}).'.format( self.validation_every_n, train_param.exclude_fraction_for_validation) data_fractions = (train_param.exclude_fraction_for_validation, train_param.exclude_fraction_for_inference) if data_param: data_partitioner.initialise( data_param=data_param, new_partition=do_new_partition, ratios=data_fractions, data_split_file=system_param.dataset_split_file) if data_param and self.is_training and self.validation_every_n > 0: assert data_partitioner.has_validation, \ 'validation_every_n is set to {}, ' \ 'but train/validation splitting not available.\nPlease ' \ 'check dataset partition list {} ' \ '(remove file to generate a new dataset partition). ' \ 'Or set validation_every_n to -1.'.format( self.validation_every_n, system_param.dataset_split_file) # initialise readers self.app.initialise_dataset_loader( data_param, app_param, data_partitioner) self._data_partitioner = data_partitioner # pylint: disable=not-context-manager with self.graph.as_default(), tf.name_scope('Sampler'): self.app.initialise_sampler()
def initialise_application(self, workflow_param, data_param): """ This function receives all parameters from user config file, create an instance of application. :param workflow_param: a dictionary of user parameters, keys correspond to sections in the config file :param data_param: a dictionary of input image parameters, keys correspond to data properties to be used by image_reader :return: """ try: system_param = workflow_param.get('SYSTEM', None) net_param = workflow_param.get('NETWORK', None) train_param = workflow_param.get('TRAINING', None) infer_param = workflow_param.get('INFERENCE', None) app_param = workflow_param.get('CUSTOM', None) except AttributeError: tf.logging.fatal('parameters should be dictionaries') raise assert os.path.exists(system_param.model_dir), \ 'Model folder not exists {}'.format(system_param.model_dir) self.is_training = (system_param.action == "train") # hardware-related parameters self.num_threads = max(system_param.num_threads, 1) \ if self.is_training else 1 self.num_gpus = system_param.num_gpus \ if self.is_training else min(system_param.num_gpus, 1) set_cuda_device(system_param.cuda_devices) # set output TF model folders self.model_dir = touch_folder( os.path.join(system_param.model_dir, 'models')) self.session_prefix = os.path.join(self.model_dir, FILE_PREFIX) if self.is_training: assert train_param, 'training parameters not specified' summary_root = os.path.join(system_param.model_dir, 'logs') self.summary_dir = get_latest_subfolder( summary_root, create_new=train_param.starting_iter == 0) self.initial_iter = train_param.starting_iter self.final_iter = max(train_param.max_iter, self.initial_iter) self.save_every_n = train_param.save_every_n self.tensorboard_every_n = train_param.tensorboard_every_n self.max_checkpoints = \ max(train_param.max_checkpoints, self.max_checkpoints) self.gradients_collector = GradientsCollector( n_devices=max(self.num_gpus, 1)) self.validation_every_n = train_param.validation_every_n if self.validation_every_n > 0: self.validation_max_iter = max(self.validation_max_iter, train_param.validation_max_iter) action_param = train_param else: assert infer_param, 'inference parameters not specified' self.initial_iter = infer_param.inference_iter action_param = infer_param self.outputs_collector = OutputsCollector( n_devices=max(self.num_gpus, 1)) # create an application instance assert app_param, 'application specific param. not specified' app_module = ApplicationDriver._create_app(app_param.name) self.app = app_module(net_param, action_param, self.is_training) # initialise data input data_partitioner = ImageSetsPartitioner() # clear the cached file lists data_partitioner.reset() do_new_partition = \ self.is_training and self.initial_iter == 0 and \ (not os.path.isfile(system_param.dataset_split_file)) and \ (train_param.exclude_fraction_for_validation > 0 or train_param.exclude_fraction_for_inference > 0) data_fractions = None if do_new_partition: assert train_param.exclude_fraction_for_validation > 0 or \ self.validation_every_n <= 0, \ 'validation_every_n is set to {}, ' \ 'but train/validation splitting not available,\nplease ' \ 'check "exclude_fraction_for_validation" in the config ' \ 'file (current config value: {}).'.format( self.validation_every_n, train_param.exclude_fraction_for_validation) data_fractions = (train_param.exclude_fraction_for_validation, train_param.exclude_fraction_for_inference) if data_param: data_partitioner.initialise( data_param=data_param, new_partition=do_new_partition, ratios=data_fractions, data_split_file=system_param.dataset_split_file) if data_param and self.is_training and self.validation_every_n > 0: assert data_partitioner.has_validation, \ 'validation_every_n is set to {}, ' \ 'but train/validation splitting not available.\nPlease ' \ 'check dataset partition list {} ' \ '(remove file to generate a new dataset partition). ' \ 'Or set validation_every_n to -1.'.format( self.validation_every_n, system_param.dataset_split_file) # initialise readers self.app.initialise_dataset_loader(data_param, app_param, data_partitioner) self._data_partitioner = data_partitioner # pylint: disable=not-context-manager with self.graph.as_default(), tf.name_scope('Sampler'): self.app.initialise_sampler()