def _standarize_feature_label_dataset(dataset, model): input_names = model.input_names output_names = model.output_names def _process_labels(ys): if isinstance(ys, dict): return { k: np.expand_dims(y, axis=-1) if y.ndim == 0 else y for k, y in ys.items() } elif isinstance(ys, list): return [ np.expand_dims(y, axis=-1) if y.ndim == 0 else y for y in ys ] elif isinstance(ys, tuple): return tuple( [np.expand_dims(y, axis=-1) if y.ndim == 0 else y for y in ys]) else: return np.expand_dims(ys, axis=-1) if ys.ndim == 0 else ys def _training_reorder(x, input_names, output_names): assert isinstance(x, tuple) return (_reorder(x[0], input_names), _reorder(x[1], output_names)) def _reorder(x, names): if isinstance(x, dict): return [x[name] for name in names] elif isinstance(x, list) or isinstance(x, tuple): return x else: return [x] rdd = dataset.rdd.map(lambda x: (x[0], _process_labels(x[1])))\ .map(lambda sample: _training_reorder(sample, input_names, output_names)) if dataset.val_rdd is not None: val_rdd = dataset.val_rdd.map(lambda x: (x[0], _process_labels(x[1])))\ .map(lambda sample: _training_reorder(sample, input_names, output_names)) else: val_rdd = None tensor_structure = _training_reorder(dataset.tensor_structure, input_names, output_names) new_dataset = TFNdarrayDataset(rdd, tensor_structure, dataset.batch_size, -1, dataset.hard_code_batch_size, val_rdd) new_dataset.batch_per_thread = dataset.batch_per_thread return new_dataset
def _standarize_feature_dataset(dataset, model): input_names = model.input_names def _reorder(x, names): if isinstance(x, dict): return [x[name] for name in names] elif isinstance(x, list): return x else: return [x] rdd = dataset.rdd.map(lambda sample: _reorder(sample, input_names)) feature_schema = _reorder(dataset.tensor_structure[0], input_names) dataset = TFNdarrayDataset(rdd, feature_schema, dataset.batch_size, -1, dataset.hard_code_batch_size) return dataset