def compile(self, optimizer='rmsprop', loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, **kwargs): self.done_broadcast = False self.compiled = True logger.info("[DataParallelModel] compile.") if isinstance(optimizer, dict): optimizer = tf.keras.optimizers.deserialize(optimizer) elif isinstance(optimizer, str): config = {'class_name': optimizer, 'config': {}} optimizer = tf.keras.optimizers.deserialize(config) self.dist_optimizer = tnt.distributed_optimizers.SynchDistributedOptimizer(optimizer, group = self.group) kwargs = self._preprocess_compile_kwargs(kwargs) return self.model.compile(optimizer = self.dist_optimizer, loss = loss, metrics = metrics, loss_weights = loss_weights, sample_weight_mode = sample_weight_mode, weighted_metrics = weighted_metrics, **kwargs)
def distributed_batch(self, dataset, batch_size, micro_batch_size, apply_batch): if self.batching_info.drop_remainder == True: dataset = self.batching_info.apply(dataset, new_batch_size=batch_size) dataset = dataset.unbatch() self._dataset = dataset else: self._dataset = dataset # pad final incomplete batch to have at least `num_ranks` samples, such that # each rank will have the same number of iterations within one epoch dataset = ds_helpers._pad_dataset_if_necessary( dataset, self.num_samples, batch_size, min_last_batch_size=self.num_ranks) dataset = self._get_dataset_slice_per_rank(dataset, batch_size, micro_batch_size) if apply_batch: dataset = self.batching_info.apply(dataset, new_batch_size=micro_batch_size) logger.info( f"Using batch size = {batch_size}, micro batch size = {micro_batch_size}." ) return dataset
def fit(self, x = None, y = None, callbacks = None, validation_data = None, **kwargs): logger.info(f"[PartitionedModel] fit.") self._configure_rebuild(dataset = x) self._build_model_and_compile_if_necessary() processed_callbacks = utilities._preprocess_callbacks(callbacks, self.group, parallel_strategy = tnt.ParallelStrategy.PIPELINING, exec_type = 'fit', verbose = kwargs.get('verbose', None)) ds = self._get_microbatched_dataset(dataset = x, nano_batch_size = self.nano_batch_size, num_pipeline_stages = self.num_pipeline_stages) distributed_validation_data = None if validation_data: distributed_validation_data = self._get_microbatched_dataset(dataset = validation_data, nano_batch_size = self.nano_batch_size, num_pipeline_stages = self.num_pipeline_stages) return self.model.fit(x = ds, callbacks = processed_callbacks, validation_data = distributed_validation_data, **kwargs)
def load_model(filepath, compile=True, **kwargs): logger.debug("Load model from file: {}".format(filepath)) keras_model = tf.keras.models.load_model(filepath, compile=compile, **kwargs) # FIXME load models with any type of parallelization strategy logger.warning("Loading model with the default `data parallel` strategy.") tnt_model = tnt.Model(keras_model, parallel_strategy=tnt.ParallelStrategy.DATA) if compile: try: tnt_optimizer = tnt.distributed_optimizers.SynchDistributedOptimizer( keras_model.optimizer, group=tnt_model.group) tnt_model.dist_optimizer = tnt_optimizer tnt_model._set_internal_optimizer(tnt_model.dist_optimizer) tnt_model.compiled = True tnt_model.done_broadcast = True if version_utils.tf_version_below_equal('2.1'): tnt_model.model._experimental_run_tf_function = False logger.info("Set `experimental_run_tf_function` to False.") except: logger.info("The loaded model was not pre-compiled.") tnt_model.barrier.execute() return tnt_model
def evaluate(self, x = None, y = None, callbacks = None, tnt_micro_batch_size = None, tnt_distribute_dataset = True, **kwargs): self._setup_for_execution('evaluate', x, y, kwargs) processed_callbacks = utilities._preprocess_callbacks(callbacks, self.group, parallel_strategy = tnt.ParallelStrategy.DATA, exec_type = 'evaluate', verbose = kwargs.get('verbose', None)) if tnt_distribute_dataset: test_dataset = tnt.data.Dataset(dataset = x, num_ranks = self.group.size, rank = self.group.to_group_rank(self.rank), shuffle_seed = self.default_shuffle_seed) x = test_dataset.distribute_dataset_across_ranks( user_micro_batch_size = tnt_micro_batch_size, is_training = False) self._validate_micro_batch_size_for_batch_normalization(test_dataset.micro_batch_size) else: logger.info("Automatic dataset distribution is disabled.") return self.model.evaluate(x, callbacks = processed_callbacks, **kwargs)
def shuffle_with_seed(self, dataset, ds_kwargs): if not 'seed' in ds_kwargs or ds_kwargs['seed'] is None: logger.info( f"Shuffling with fixed shuffle seed {self.shuffle_seed}") ds_kwargs['seed'] = self.shuffle_seed else: logger.info(f"Shuffling with shuffle seed {ds_kwargs['seed']}") return dataset.shuffle(**ds_kwargs)
def save_model(model, filepath, **kwargs): if isinstance(model, tnt.Model): logger.info("save model from instance of tnt.Model") elif isinstance(model, tf.keras.Model): logger.info("save model from instance of tf.keras.Model") else: raise ValueError("[tnt.models.save_model] `model` needs to be either", "a `tf.keras.Model`, or a `tnt.Model`") model.save(filepath, **kwargs)
def from_config(cls, config, **kwargs): try: keras_model = tf.keras.Model.from_config(config, **kwargs) logger.info("Loaded model from `keras.Model`.") except: raise RuntimeError("""[tnt.Model.from_config] Cannot load model; provided configuration is neither a `keras.Model` nor a `tnt.Model`.""") return cls(keras_model)
def clone_model(model, **kwargs): if isinstance(model, tnt.Model): keras_model = tf.keras.models.clone_model(model.model, **kwargs) logger.info("clone model from instance of tnt.Model") elif isinstance(model, tf.keras.Model): keras_model = tf.keras.models.clone_model(model, **kwargs) logger.info("clone model from instance of tf.keras.Model") else: raise ValueError("[tnt.models.clone_model] `model` needs to be either", "a `tf.keras.Model`, or a `tnt.Model`") return tnt.Model(keras_model)
def _build_model_and_compile_if_necessary(self): if self.built: logger.info(f"[PartitionedModel] Model already built with nano_batch_size={self.nano_batch_size}") return logger.info(f"[PartitionedModel] Building pipelined model with nano_batch_size={self.nano_batch_size}") self.microbatched_model_builder = self._get_microbatched_model_builder(self.nano_batch_size) self.model = self.microbatched_model_builder.get_model() compile_parameters = self._get_partition_compile_params() self.model.compile(**compile_parameters) self.built = True
def clone_model(model, **kwargs): if isinstance(model, tnt.strategy.parallel_model.ParallelModel): keras_model = tf.keras.models.clone_model(model.model, **kwargs) logger.info("clone model from instance of tnt.Model") elif isinstance(model, tf.keras.Model): keras_model = tf.keras.models.clone_model(model, **kwargs) logger.info("clone model from instance of tf.keras.Model") else: raise ValueError("[tnt.models.clone_model] `model` needs to be either", "a `tf.keras.Model`, or a `tnt.Model`") # FIXME load models with any type of parallelization strategy logger.warning("Loading model with the default `data parallel` strategy.") return tnt.Model(keras_model, parallel_strategy=tnt.ParallelStrategy.DATA)
def from_config(cls, config, **kwargs): try: keras_model = tf.keras.Sequential.from_config(config, **kwargs) logger.info("Loaded model from `keras.Sequential`.") except: raise RuntimeError( """[tnt.keras.Sequential.from_config] Cannot load model; provided configuration is not a `keras.Sequential` model.""" ) # FIXME load models with any type of parallelization strategy logger.warning( "Loading model with the default `data parallel` strategy.") return tnt.Model(keras_model, parallel_strategy=tnt.ParallelStrategy.DATA)
def fit(self, x=None, y=None, callbacks=None, validation_data=None, tnt_micro_batch_size=None, tnt_validation_micro_batch_size=None, tnt_distribute_dataset=True, tnt_distribute_validation_dataset=True, **kwargs): self._setup_for_execution('fit', x, y, callbacks, kwargs) if tnt_distribute_dataset: distributed_x = ds.DistributedDataset( dataset=x, num_ranks=self.comm_size, rank=self.rank, shuffle_seed=self.default_shuffle_seed) x = distributed_x.distribute_dataset_across_ranks( user_micro_batch_size=tnt_micro_batch_size, is_training=True) else: logger.info( "Automatic dataset distribution is disabled." "Make sure the dataset is sharded manually across ranks.") # Always switch off shuffling kwargs["shuffle"] = False if validation_data: if tnt_distribute_validation_dataset: distributed_validation_data = ds.DistributedDataset( dataset=validation_data, num_ranks=self.comm_size, rank=self.rank, shuffle_seed=self.default_shuffle_seed) validation_data = distributed_validation_data.distribute_dataset_across_ranks( user_micro_batch_size=tnt_validation_micro_batch_size, is_training=False) else: logger.info( "Automatic distribution for the validation dataset is disabled." ) return self.model.fit(x, validation_data=validation_data, callbacks=callbacks, **kwargs)
def _pad_dataset_if_necessary(dataset, num_samples, batch_size, min_last_batch_size): last_batch_size = _get_last_incomplete_batch_size(num_samples, batch_size) if last_batch_size == 0: logger.debug(f"No padding required: number of samples {num_samples} is a multiple " \ f"of the batch size {batch_size}.") return dataset logger.info(f"Incomplete last batch in the dataset: number of samples is " \ f"{last_batch_size} ( != batch size {batch_size}).") if version_utils.tf_version_below_equal('2.1'): num_samples_multiple = num_samples - last_batch_size logger.warn(f"Number of samples ({num_samples}) is not a multiple of batch size. " \ f"This use case is not supported in TF v{version_utils.current_version()}. " \ f"Dropping the last incomplete batch from the dataset, "\ f"and proceeding with {num_samples_multiple} samples.") return dataset.take(num_samples_multiple) if last_batch_size < min_last_batch_size: logger.debug(f"Padding required for the last batch: number of samples is " \ f"{last_batch_size} ( < min_batch_size {min_last_batch_size}).") # Create helper dataset that contains one full batch and one incomplete batch helper_dataset = dataset.take(min_last_batch_size + last_batch_size) helper_dataset = helper_dataset.batch(min_last_batch_size, drop_remainder=False) # If `padded_shape` is unspecified, all dimensions of all components # are padded to the maximum size in the batch. # The second batch in `helper_dataset` will now contain `min_last_batch_size - last_batch_size` # default-initialized samples. helper_dataset = helper_dataset.padded_batch(2) # Switch back to a list of samples instead of batches helper_dataset = helper_dataset.unbatch().unbatch() # Remaining samples in the dataset are those generated through padding padding_samples = helper_dataset.skip(min_last_batch_size + last_batch_size) dataset = dataset.concatenate(padding_samples) logger.info(f"[Rank {tnt.get_rank()}] Dataset padded with " \ f"{min_last_batch_size - last_batch_size} samples.") return dataset
def predict(self, x=None, callbacks=None, tnt_micro_batch_size=None, tnt_distribute_dataset=True, **kwargs): self._setup_for_execution('predict', x, None, callbacks, kwargs) if tnt_distribute_dataset: test_dataset = ds.DistributedDataset( dataset=x, num_ranks=self.comm_size, rank=self.rank, shuffle_seed=self.default_shuffle_seed) x = test_dataset.distribute_dataset_across_ranks( user_micro_batch_size=tnt_micro_batch_size, is_training=False) else: logger.info("Automatic dataset distribution is disabled.") return self.model.predict(x, callbacks=callbacks, **kwargs)
def compile(self, optimizer='rmsprop', loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, **kwargs): self.built = True params = dict(locals()) logger.info(f"[PartitionedModel] compile.") self.compile_properties = CompileProperties(self.model, params) return self.model.compile(optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, **kwargs)
def distributed_batch(self, dataset, batch_size, micro_batch_size): if self.batching_info.drop_remainder == True: dataset = self.batching_info.apply(dataset, new_batch_size = batch_size) dataset = dataset.unbatch() else: # no drop remainder num_samples = ds_helpers.get_num_samples(dataset) if num_samples == tf.data.experimental.INFINITE_CARDINALITY: raise ValueError("[DistributedDataset] Infinite dataset provided") # Total number of samples is not multiple of the batch size if num_samples % batch_size != 0: logger.warn("Number of samples ({}) is not a multiple of batch size.\ Removing the last incomplete batch from the dataset.".format(num_samples)) num_samples_multiple = (num_samples // batch_size) * batch_size dataset = dataset.take(num_samples_multiple) dataset = self.batching_info.apply(dataset, new_batch_size = micro_batch_size) dataset = dataset.shard(num_shards=self.num_ranks, index = self.rank) logger.info("Using batch size = {}, micro batch size = {}.".format( batch_size, micro_batch_size)) return dataset
def _create_tnt_model(cls, model: tf.keras.Model, parallel_strategy: tnt.ParallelStrategy = tnt.ParallelStrategy.ALL if TF_DEFAULT_PIPELINING_FLAG \ else tnt.ParallelStrategy.DATA, num_pipeline_stages: int = 1): replica_group = tnt.Group() if (tnt.ParallelStrategy.PIPELINING in parallel_strategy) and isinstance(model, tf.keras.Sequential): logger.warn( f"Cannot pipeline a `tf.keras.Sequential` model; disabling model parallelism." ) parallel_strategy = parallel_strategy ^ tnt.ParallelStrategy.PIPELINING logger.info(f"Creating parallel model using {parallel_strategy}.") if tnt.ParallelStrategy.PIPELINING in parallel_strategy: rank = tnt.get_rank() partition_generator = pgen.GraphPartitionGenerator(model) rank_mapper = rmapper.RankMapper( num_ranks=tnt.get_size(), pipeline_graph=partition_generator.get_pipeline_graph()) pipeline_group = rank_mapper.get_pipelining_group_for_rank(rank) logger.info( f"[Pipelining] Creating pipelined model with {pipeline_group.size} partitions." ) # get my partition model = pm.PartitionedModel( model=model, group=pipeline_group, partition_generator=partition_generator, rank_mapper=rank_mapper, num_pipeline_stages=num_pipeline_stages) if tnt.ParallelStrategy.DATA in parallel_strategy: replica_group = rank_mapper.get_replica_group_for_rank(rank) else: if pipeline_group.size != tnt.get_size(): raise ValueError( f"Provided model has only {pipeline_group.size} partitions; use {pipeline_group.size} ranks or a different parallel strategy." ) if tnt.ParallelStrategy.DATA in parallel_strategy: # replicate my partition across the data parallel group logger.info( f"[DataParallel] Replicating local model across ranks {replica_group.group}." ) model = dpm.DataParallelModel(model=model, group=replica_group) return model
def fit(self, x = None, y = None, callbacks = None, validation_data = None, tnt_micro_batch_size = None, tnt_validation_micro_batch_size = None, tnt_distribute_dataset = True, tnt_distribute_validation_dataset = True, **kwargs): self._setup_for_execution('fit', x, y, kwargs) processed_callbacks = utilities._preprocess_callbacks(callbacks, self.group, parallel_strategy = tnt.ParallelStrategy.DATA, exec_type = 'fit', verbose = kwargs.get('verbose', None)) if tnt_distribute_dataset: # Distribute dataset into micro-batches among ranks by taking into account # all possible cases of splitting the dataset: # # 1. Batch size # a. `batch_size` is a multiple of the number of ranks # => identical `micro_batch_size` for all ranks # b. `batch_size` is not a multiple of the number of ranks # => different ranks have different `micro_batch_size`s and # locally computed gradients need to be scaled by a factor to # account for the differences # c. `batch_size` < number of ranks # => raise Error # # 2. Last batch within epoch # a. the last batch in the dataset is incomplete, but dataset is batched # with `drop_remainder = True` # => the last batch is dropped # b. the last batch in the dataset is incomplete with `drop_remainder = False` # - number of samples in the last batch is smaller than `num_ranks`, # => pad the dataset with a number of zeroed samples to ensure that each rank # has one sample, so that they all see the same number of iterations in an epoch; # the fake samples will be filtered out from the final gradient computation by # assigning them `micro_batch_size = 0` # - number of samples in the last batch is >= `num_ranks` # => last batch can be considered a new `batch_size`, which will be handled as above (in 1.), # both for computing the `micro_batch_size` and the `scaling_factor` distributed_x = tnt.data.Dataset(dataset = x, num_ranks = self.group.size, rank = self.group.to_group_rank(self.rank), shuffle_seed = self.default_shuffle_seed) x = distributed_x.distribute_dataset_across_ranks( user_micro_batch_size = tnt_micro_batch_size, is_training = True) self._validate_micro_batch_size_for_batch_normalization(distributed_x.micro_batch_size) # if different ranks have different micro-batch sizes, the gradients need rescaling dataset_callback = distributed_x.get_gradient_scaling_callback() if dataset_callback: processed_callbacks.append(dataset_callback) else: logger.info("Automatic dataset distribution is disabled." "Make sure the dataset is sharded manually across ranks.") # Always switch off shuffling kwargs["shuffle"] = False if validation_data: if tnt_distribute_validation_dataset: distributed_validation_data = tnt.data.Dataset(dataset = validation_data, num_ranks = self.group.size, rank = self.group.to_group_rank(self.rank), shuffle_seed = self.default_shuffle_seed) validation_data = distributed_validation_data.distribute_dataset_across_ranks( user_micro_batch_size = tnt_validation_micro_batch_size, is_training = False) self._validate_micro_batch_size_for_batch_normalization(distributed_validation_data.micro_batch_size) else: logger.info("Automatic distribution for the validation dataset is disabled.") return self.model.fit(x = x, validation_data = validation_data, callbacks = processed_callbacks, **kwargs)
def _preprocess_compile_kwargs(self, kwargs): if version_utils.tf_version_below_equal('2.1'): kwargs['experimental_run_tf_function'] = False logger.info("Set `experimental_run_tf_function` to False.") return kwargs