def _average_training_metrics( combined_timeseries: Dict[str, Any], combined_num_batches: List[int]) -> List[Dict[str, Any]]: """Average combined training metrics across GPUs""" # If the value for a metric is a single-element array, the averaging process will # change that into just the element. We record what metrics are single-element arrays # so we can wrap them in an array later (for perfect compatibility with non-averaging # codepath). array_metrics = [] for metric_name in combined_timeseries.keys(): process_batches = combined_timeseries[metric_name] if isinstance(process_batches[0][0], np.ndarray): array_metrics.append(metric_name) num_batches = combined_num_batches[ 0] # num_batches matches across data parallel ranks. num_processes = len(combined_num_batches) averaged_metrics_timeseries = {} # type: Dict[str, List] for metric_name in combined_timeseries.keys(): averaged_metrics_timeseries[metric_name] = [] for batch_idx in range(num_batches): batch = [ combined_timeseries[metric_name][process_idx][batch_idx] for process_idx in range(num_processes) ] np_batch = np.array(batch) batch_avg = np.mean(np_batch[np_batch != None]) # noqa: E711 if metric_name in array_metrics: batch_avg = np.array(batch_avg) averaged_metrics_timeseries[metric_name].append(batch_avg) return util._dict_to_list(averaged_metrics_timeseries)
def _average_training_metrics( self, per_batch_metrics: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Average training metrics across GPUs""" check.true(self.hvd_config.use, "Can only average training metrics in multi-GPU training.") metrics_timeseries = util._list_to_dict(per_batch_metrics) # combined_timeseries is: dict[metric_name] -> 2d-array. # A measurement is accessed via combined_timeseries[metric_name][process_idx][batch_idx]. combined_timeseries, _ = self._combine_metrics_across_processes( metrics_timeseries, num_batches=len(per_batch_metrics)) # If the value for a metric is a single-element array, the averaging process will # change that into just the element. We record what metrics are single-element arrays # so we can wrap them in an array later (for perfect compatibility with non-averaging # codepath). array_metrics = [] for metric_name in per_batch_metrics[0].keys(): if isinstance(per_batch_metrics[0][metric_name], np.ndarray): array_metrics.append(metric_name) if self.is_chief: combined_timeseries_type = Dict[str, List[List[Any]]] combined_timeseries = cast(combined_timeseries_type, combined_timeseries) num_batches = len(per_batch_metrics) num_processes = hvd.size() averaged_metrics_timeseries = {} # type: Dict[str, List] for metric_name in combined_timeseries.keys(): averaged_metrics_timeseries[metric_name] = [] for batch_idx in range(num_batches): batch = [ combined_timeseries[metric_name][process_idx] [batch_idx] for process_idx in range(num_processes) ] np_batch = np.array(batch) batch_avg = np.mean( np_batch[np_batch != None]) # noqa: E711 if metric_name in array_metrics: batch_avg = np.array(batch_avg) averaged_metrics_timeseries[metric_name].append(batch_avg) per_batch_metrics = util._dict_to_list(averaged_metrics_timeseries) return per_batch_metrics
def test_dict_to_list() -> None: r = _dict_to_list({"a": [1, 2], "b": [3, 4]}) assert r == [{"a": 1, "b": 3}, {"a": 2, "b": 4}]